diff --git a/.gitignore b/.gitignore
index 5afe375f46f07b3b557ae23f75740b337517d3bd..1ef4c297ee4f369775c13b32a46a55887de719e7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ __pycache__
*.swp
.vscode/
cmake_build/
+tensorflow/contrib/cmake/_build/
.idea/**
/build/
[Bb]uild/
@@ -30,6 +31,7 @@ Podfile.lock
xcuserdata/**
/api_init_files_list.txt
/estimator_api_init_files_list.txt
+*.whl
# Android
.gradle
diff --git a/CODEOWNERS b/CODEOWNERS
index b9f0313cc6d59d3fbdcd014e1a528126d863075a..94cc865479cd6ab5cdb589490d3a2d650f06b160 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,53 +1,67 @@
-# NOTE: Disabled temporarily because it's too noisy on pushes.
# Where component owners are known, add them here.
-# /tensorflow/core/platform/windows/ @mrry
-# /tensorflow/java/ @asimshankar
-# /tensorflow/tensorboard/ @jart @dandelionmane
-# /tensorflow/tools/docs/ @markdaoust
+/tenosrflow/core/debug @caisq
+/tensorflow/core/platform/windows/ @mrry
+/tensorflow/core/platform/s3 @yongtang
+/tensorflow/go @asimshankar
+/tensorflow/java/ @asimshankar
+/tensorflow/python/debug @caisq
+/tensorflow/python/tools/api/generator/ @annarev
+/tensorflow/tensorboard/ @jart
+/tensorflow/tools/docs/ @markdaoust
# contrib
-# NEED OWNER: /tensorflow/contrib/avro/
-# /tensorflow/contrib/batching/ @alextp @chrisolston
-# /tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
-# /tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
-# /tensorflow/contrib/cmake/ @mrry @benoitsteiner
-# /tensorflow/contrib/copy_graph/ @tucker @poxvoculi
-# /tensorflow/contrib/crf/ @kentonl
-# /tensorflow/contrib/data/ @mrry
-# /tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
-# /tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
-# /tensorflow/contrib/ffmpeg/ @fredbertsch
-# NEED OWNER: /tensorflow/contrib/framework/
-# /tensorflow/contrib/graph_editor/ @purpledog
+# NEED OWNER: /tensorflow/contrib/all_reduce
+/tensorflow/contrib/batching/ @alextp @chrisolston
+/tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
+/tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
+/tensorflow/contrib/checkpoint/ @allenlavoie
+/tensorflow/contrib/contrib/cluster_resolver/ @frankchn
+/tensorflow/contrib/cmake/ @mrry
+/tensorflow/contrib/copy_graph/ @tucker @poxvoculi
+/tensorflow/contrib/crf/ @kentonl
+/tensorflow/contrib/data/ @mrry
+/tensorflow/tensorflow/contrib/distribute @joshl @priyag @sourabhbajaj @frankchn
+/tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
+/tensorflow/contrib/eager @alextp @asimshankar
+/tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
+/tensorflow/contrib/ffmpeg/ @fredbertsch
+/tensorflow/contrib/framework/ @ebrevdo
+/tensorflow/contrib/gan/ @joel-shor
+/tensorflow/contrib/graph_editor/ @purpledog
# NEED OWNER: /tensorflow/contrib/grid_rnn/
-# /tensorflow/contrib/hvx/ @satok16
-# /tensorflow/contrib/integrate/ @shoyer
-# /tensorflow/contrib/kernel_methods/ @petrosmol
-# /tensorflow/contrib/ios_examples/ @petewarden
-# /tensorflow/contrib/labeled_tensor/ @shoyer
-# /tensorflow/contrib/layers/ @fchollet @martinwicke
-# /tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
-# /tensorflow/contrib/linalg/ @langmore
-# /tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
-# /tensorflow/contrib/lookup/ @ysuematsu @andreasst
-# /tensorflow/contrib/losses/ @alextp @ispirmustafa
-# /tensorflow/contrib/makefile/ @petewarden @satok16 @wolffg
-# /tensorflow/contrib/metrics/ @alextp @honkentuber @ispirmustafa
-# /tensorflow/contrib/nccl/ @cwhipkey @zheng-xq
-# /tensorflow/contrib/opt/ @strategist333
-# /tensorflow/contrib/pi_examples/ @maciekcc
-# /tensorflow/contrib/quantization/ @petewarden @cwhipkey @keveman
-# /tensorflow/contrib/rnn/ @ebrevdo
-# /tensorflow/contrib/saved_model/ @nfiedel @sukritiramesh
-# /tensorflow/contrib/seq2seq/ @lukaszkaiser
-# /tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
-# /tensorflow/contrib/slim/ @sguada @thenbasilmanran
-# /tensorflow/contrib/stateless/ @girving
-# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank
-# /tensorflow/contrib/testing/ @dandelionmane
-# /tensorflow/contrib/timeseries/ @allenlavoie
-# /tensorflow/contrib/tpu/ @frankchn @saeta @jhseu
-# /tensorflow/contrib/training/ @joel-shor @ebrevdo
-# /tensorflow/contrib/util/ @sherrym
+/tensorflow/contrib/hadoop @yongtang
+/tensorflow/contrib/hvx/ @satok16
+/tensorflow/contrib/integrate/ @shoyer
+/tensorflow/contrib/kafka @yongtang
+/tensorflow/contrib/kernel_methods/ @petrosmol
+/tensorflow/contrib/kinesis @yongtang
+/tensorflow/contrib/ios_examples/ @petewarden
+/tensorflow/contrib/labeled_tensor/ @shoyer
+/tensorflow/contrib/layers/ @fchollet @martinwicke
+/tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
+/tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
+/tensorflow/contrib/lookup/ @ysuematsu @andreasst
+/tensorflow/contrib/losses/ @alextp @ispirmustafa
+/tensorflow/contrib/makefile/ @petewarden @satok16 @wolffg
+/tensorflow/contrib/metrics/ @alextp @honkentuber @ispirmustafa
+/tensorflow/contrib/nccl/ @cwhipkey @zheng-xq
+/tensorflow/contrib/opt/ @strategist333 @alextp
+/tensorflow/contrib/pi_examples/ @maciekcc
+/tensorflow/contrib/quantization/ @petewarden
+/tensorflow/contrib/rnn/ @ebrevdo @scottzhu
+/tensorflow/contrib/saved_model/ @nfiedel @sukritiramesh @allenl
+/tensorflow/contrib/seq2seq/ @ebrevdo @lmthang
+/tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
+/tensorflow/contrib/slim/ @sguada @thenbasilmanran
+/tensorflow/contrib/stateless/ @girving @alextp
+/tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank
+/tensorflow/contrib/tensorrt/ @aaroey
+# NEED OWNER: /tensorflow/contrib/testing/
+/tensorflow/contrib/timeseries/ @allenlavoie
+/tensorflow/contrib/tpu/ @frankchn @saeta @jhseu @sourabhbajaj
+/tensorflow/contrib/training/ @joel-shor @ebrevdo
+/tensorflow/contrib/util/ @sherrym
+
+/third_party/systemlibs/ @perfinion
diff --git a/README.md b/README.md
index 82de010dd445c57c3fcc566db53e18db025c1f9e..57efb876c9afaf9fe76c4ced4e6a1572e9241edf 100644
--- a/README.md
+++ b/README.md
@@ -22,12 +22,14 @@ organization for the purposes of conducting machine learning and deep neural
networks research. The system is general enough to be applicable in a wide
variety of other domains, as well.
+TensorFlow provides stable Python API and C APIs as well as without API backwards compatibility guarantee like C++, Go, Java, JavaScript and Swift.
+
Keep up to date with release announcements and security updates by
subscribing to
[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce).
## Installation
-*See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.*
+*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.*
People who are a little more adventurous can also try our nightly binaries:
@@ -46,15 +48,12 @@ $ python
```
```python
>>> import tensorflow as tf
+>>> tf.enable_eager_execution()
+>>> tf.add(1, 2)
+3
>>> hello = tf.constant('Hello, TensorFlow!')
->>> sess = tf.Session()
->>> sess.run(hello)
+>>> hello.numpy()
'Hello, TensorFlow!'
->>> a = tf.constant(10)
->>> b = tf.constant(32)
->>> sess.run(a + b)
-42
->>> sess.close()
```
Learn more examples about how to do specific tasks in TensorFlow at the [tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/).
@@ -81,13 +80,15 @@ The TensorFlow project strives to abide by generally accepted best practices in
| Build Type | Status | Artifacts |
| --- | --- | --- |
-| **Linux CPU** |  | [pypi](https://pypi.org/project/tf-nightly/) |
-| **Linux GPU** |  | [pypi](https://pypi.org/project/tf-nightly-gpu/) |
-| **Linux XLA** |  | TBA |
-| **MacOS** |  | [pypi](https://pypi.org/project/tf-nightly/) |
-| **Windows CPU** |  | [pypi](https://pypi.org/project/tf-nightly/) |
-| **Windows GPU** |  | [pypi](https://pypi.org/project/tf-nightly-gpu/) |
-| **Android** |  | [](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) |
+| **Linux CPU** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-cc.html) | [pypi](https://pypi.org/project/tf-nightly/) |
+| **Linux GPU** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-gpu-py3.html) | [pypi](https://pypi.org/project/tf-nightly-gpu/) |
+| **Linux XLA** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/ubuntu-xla.html) | TBA |
+| **MacOS** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/macos-py2-cc.html) | [pypi](https://pypi.org/project/tf-nightly/) |
+| **Windows CPU** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-cpu.html) | [pypi](https://pypi.org/project/tf-nightly/) |
+| **Windows GPU** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-gpu.html) | [pypi](https://pypi.org/project/tf-nightly-gpu/) |
+| **Android** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/android.html) | [](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) |
+| **Raspberry Pi 0 and 1** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py2.html) [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py3.html) | [Py2](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp27-none-linux_armv6l.whl) [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv6l.whl) |
+| **Raspberry Pi 2 and 3** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py2.html) [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py3.html) | [Py2](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp27-none-linux_armv7l.whl) [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv7l.whl) |
### Community Supported Builds
@@ -97,17 +98,20 @@ The TensorFlow project strives to abide by generally accepted best practices in
| **IBM s390x** | [](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA |
| **IBM ppc64le CPU** | [](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA |
| **IBM ppc64le GPU** | [](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA |
-| **Linux CPU with Intel® MKL-DNN®** | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | TBA |
+| **Linux CPU with Intel® MKL-DNN** Nightly | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) |
+| **Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild)|[1.10.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp27-cp27mu-linux_x86_64.whl)
[1.10.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl)
[1.10.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp36-cp36m-linux_x86_64.whl) |
## For more information
-
* [TensorFlow Website](https://www.tensorflow.org)
-* [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
-* [TensorFlow YouTube Channel](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ)
+* [TensorFlow Tutorials](https://www.tensorflow.org/tutorials/)
* [TensorFlow Model Zoo](https://github.com/tensorflow/models)
-* [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730)
+* [TensorFlow Twitter](https://twitter.com/tensorflow)
+* [TensorFlow Blog](https://medium.com/tensorflow)
* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
+* [TensorFlow Roadmap](https://www.tensorflow.org/community/roadmap)
+* [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
+* [TensorFlow YouTube Channel](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ)
Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate.
diff --git a/RELEASE.md b/RELEASE.md
index 078aafd3746e5ce5c16af15de80d99c1a9e8c567..20e1d9217b7684e696d0abf427eef9ab9548d1b7 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,9 +1,92 @@
+# Release 1.11.0
+
+## Major Features and Improvements
+
+* Nvidia GPU:
+ * Prebuilt binaries are now (as of TensorFlow 1.11) built against cuDNN 7.2 and TensorRT 4. See updated install guides: [Installing TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux#tensorflow_gpu_support)
+* Google Cloud TPU:
+ * Experimental tf.data integration for Keras on Google Cloud TPUs.
+ * Experimental / preview support for eager execution on Google Cloud TPUs.
+* DistributionStrategy:
+ * Add multi-GPU DistributionStrategy support in tf.keras. Users can now use `fit`, `evaluate` and `predict` to distribute their model on multiple GPUs.
+ * Add multi-worker DistributionStrategy and standalone client support in Estimator. See [README] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute) for more details.
+* Add C, C++, and Python functions for querying kernels
+
+## Breaking Changes
+
+* Keras:
+ * The default values for tf.keras `RandomUniform`, `RandomNormal`, and `TruncatedNormal` initializers have been changed to match those in external Keras.
+ * Breaking change: `model.get_config()` on a Sequential model now returns a config dictionary (consistent with other Model instances) instead of a list of configs for the underlying layers.
+
+## Bug Fixes and Other Changes
+
+* C++:
+ * Changed the signature of SessionFactory::NewSession so that it can return a meaningful error message on failure.
+* tf.data:
+ * Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`. [tf.data] Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`.
+ * `tf.data.Dataset.list_files()` raises an exception at initialization time if the argument matches no files.
+ * Renamed BigTable class to BigtableTable for clarity
+ * Document use of the Cloud Bigtable API
+ * Adding `tf.contrib.data.reduce_dataset` which can be used to reduce a dataset to a single element.
+ * Generalization of `tf.contrib.data.sliding_window_batch`.
+* INC:
+ * Runtime improvements to triangular solve.
+* `tf.contrib`:
+ * Add an `implementation` argument to `tf.keras.layers.LocallyConnected2D` and `tf.keras.layers.LocallyConnected1D`. The new mode (`implementation=2`) performs forward pass as a single dense matrix multiplication, allowing dramatic speedups in certain scenarios (but worse performance in others - see docstring). The option also allows to use `padding=same`.
+ * Add documentation clarifying the differences between tf.fill and tf.constant.
+ * Add experimental IndexedDatasets.
+ * Add selective registration target using the lite proto runtime.
+ * Add simple Tensor and DataType classes to TensorFlow Lite Java
+ * Add support for bitcasting to/from uint32 and uint64.
+ * Added a subclass of Estimator that can be created from a SavedModel (SavedModelEstimator).
+ * Adds leaf index modes as an argument.
+ * Allow a different output shape from the input in tf.contrib.image.transform.
+ * Change the state_size order of the StackedRNNCell to be natural order. To keep the existing behavior, user can add reverse_state_order=True when constructing the StackedRNNCells.
+ * Deprecate self.test_session() in favor of self.session() or self.cached_session().
+ * Directly import tensor.proto.h (the transitive import will be removed from tensor.h soon)
+ * Estimator.train() now supports tf.contrib.summary.\* summaries out of the box; each call to .train() will now create a separate tfevents file rather than re-using a shared one.
+ * Fix FTRL L2-shrinkage behavior: the gradient from the L2 shrinkage term should not end up in the accumulator.
+ * Fix toco compilation/execution on Windows
+ * GoogleZoneProvider class added to detect which Google Cloud Engine zone tensorflow is running in.
+ * It is now safe to call any of the C API's TF_Delete\* functions on nullptr
+ * Log some errors on Android to logcat
+ * Match FakeQuant numerics in TFLite to improve accuracy of TFLite quantized inference models.
+ * Optional bucket location check for the GCS Filesystem.
+ * Performance enhancements for StringSplitOp & StringSplitV2Op.
+ * Performance improvements for regex replace operations.
+ * TFRecordWriter now raises an error if .write() fails.
+ * TPU: More helpful error messages in TPUClusterResolvers.
+ * The legacy_init_op argument to SavedModelBuilder methods for adding MetaGraphs has been deprecated. Please use the equivalent main_op argument instead. As part of this, we now explicitly check for a single main_op or legacy_init_op at the time of SavedModel building, whereas the check on main_op was previously only done at load time.
+ * The protocol used for Estimator training is now configurable in RunConfig.
+ * Triangular solve performance improvements.
+ * Unify RNN cell interface between TF and Keras. Add new get_initial_state() to Keras and TF RNN cell, which will use to replace the existing zero_state() method.
+ * Update initialization of variables in Keras.
+ * Updates to "constrained_optimization" in tensorflow/contrib.
+ * boosted trees: adding pruning mode
+ * tf.train.Checkpoint does not delete old checkpoints by default.
+ * tfdbg: Limit the total disk space occupied by dumped tensor data to 100 GBytes. Add environment variable `TFDBG_DISK_BYTES_LIMIT` to allow adjustment of this upper limit.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+Aapeli, adoda, Ag Ramesh, Amogh Mannekote, Andrew Gibiansky, Andy Craze, Anirudh Koul, Aurelien Geron, Avijit, Avijit-Nervana, Ben, Benjamin H. Myara, bhack, Brett Koonce, Cao Zongyan, cbockman, cheerss, Chikanaga Tomoyuki, Clayne Robison, cosine0, Cui Wei, Dan J, David, David Norman, Dmitry Klimenkov, Eliel Hojman, Florian Courtial, fo40225, formath, Geoffrey Irving, gracehoney, Grzegorz Pawelczak, Guoliang Hua, Guozhong Zhuang, Herman Zvonimir DošIlović, HuiyangFei, Jacker, Jan HüNnemeyer, Jason Taylor, Jason Zaman, Jesse, Jiang,Zhoulong, Jiawei Zhang, Jie, Joe Yearsley, Johannes Schmitz, Jon Perl, Jon Triebenbach, Jonathan, Jonathan Hseu, Jongmin Park, Justin Shenk, karl@kubx.ca, Kate Hodesdon, Kb Sriram, Keishi Hattori, Kenneth Blomqvist, Koan-Sin Tan, Li Liangbin, Li, Yiqiang, Loo Rong Jie, Madiyar, Mahmoud Abuzaina, Mark Ryan, Matt Dodge, mbhuiyan, melvinljy96, Miguel Mota, Nafis Sadat, Nathan Luehr, naurril, Nehal J Wani, Niall Moran, Niranjan Hasabnis, Nishidha Panpaliya, npow, olicht, Pei Zhang, Peng Wang (Simpeng), Peng Yu, Philipp Jund, Pradeep Banavara, Pratik Kalshetti, qwertWZ, Rakesh Chada, Randy West, Ray Kim, Rholais Lii, Robin Richtsfeld, Rodrigo Silveira, Ruizhi, Santosh Kumar, Seb Bro, Sergei Lebedev, sfujiwara, Shaba Abhiram, Shashi, SneakyFish5, Soila Kavulya, Stefan Dyulgerov, Steven Winston, Sunitha Kambhampati, Surry Shome, Taehoon Lee, Thor Johnsen, Tristan Rice, TShapinsky, tucan, tucan9389, Vicente Reyes, Vilmar-Hillow, Vitaly Lavrukhin, wangershi, weidan.kong, weidankong, Wen-Heng (Jack) Chung, William D. Irons, Wim Glenn, XFeiF, Yan Facai (颜发才), Yanbo Liang, Yong Tang, Yoshihiro Yamazaki, Yuan (Terry) Tang, Yuan, Man, zhaoyongke, ÁRon
+Ricardo Perez-Lopez, 张天启, 张晓飞
+
+
+# Release 1.10.1
+## Bug Fixes and Other Changes
+
+* `tf.keras`:
+ * Fixing keras on Cloud TPUs. No new binaries will be built for Windows.
+
+
# Release 1.10.0
## Major Features And Improvements
* The `tf.lite` runtime now supports `complex64`.
-* Initial Bigtable integration for `tf.data`.
+* Initial [Google Cloud Bigtable integration](https://github.com/tensorflow/tensorflow/tree/r1.10/tensorflow/contrib/bigtable) for `tf.data`.
* Improved local run behavior in `tf.estimator.train_and_evaluate` which does not reload checkpoints for evaluation.
* `RunConfig` now sets device_filters to restrict how workers and PS can communicate. This can speed up training and ensure clean shutdowns in some situations. But if you have jobs that require communication between workers, you will have to set custom session_options in your `RunConfig`.
* Moved Distributions and Bijectors from `tf.contrib.distributions` to [Tensorflow Probability (TFP)](https://github.com/tensorflow/probability). `tf.contrib.distributions` is now deprecated and will be removed by the end of 2018.
@@ -11,7 +94,7 @@
## Breaking Changes
-* Prebuilt binaries are now (as of TensorFlow 1.10) built against NCCL 2.2 and no longer include NCCL in the binary install. TensorFlow usage with multiple GPUs and NCCL requires upgrade to [NCCL 2.2](https://developer.nvidia.com/nccl). See updated install guides: [Installing TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux#tensorflow_gpu_support) and [Install TensorFlow from Sources](https://www.tensorflow.org/install/install_sources#optional_install_tensorflow_for_gpu_prerequisites).
+* Prebuilt binaries are now (as of TensorFlow 1.10) built against NCCL 2.2 and no longer include NCCL in the binary install. TensorFlow usage with multiple GPUs and NCCL requires upgrade to [NCCL 2.2](https://developer.nvidia.com/nccl). See updated install guides: [TensorFlow GPU support](https://www.tensorflow.org/install/gpu) and [Build TensorFlow from source](https://www.tensorflow.org/install/source).
* Starting from TensorFlow 1.11, Windows builds will use Bazel. Therefore, we will drop official support for cmake.
## Bug Fixes and Other Changes
@@ -19,7 +102,7 @@
* `tf.data`:
* `tf.contrib.data.group_by_reducer()` is now available via the public API.
* `tf.contrib.data.choose_from_datasets()` is now available via the public API.
- * Adding `drop_remainder` argument to `tf.data.Dataset.batch()` and `tf.data.Dataset.padded_batch()`, deprecating tf.contrib.data.batch_and_drop_remainder()` and `tf.contrib.data.padded_batch_and_drop_remainder()`.
+ * Adding `drop_remainder` argument to `tf.data.Dataset.batch()` and `tf.data.Dataset.padded_batch()`, deprecating `tf.contrib.data.batch_and_drop_remainder()` and `tf.contrib.data.padded_batch_and_drop_remainder()`.
* `tf.estimator`:
* `Estimator`s now use custom savers included in `EstimatorSpec` scaffolds for saving SavedModels during export.
* `EstimatorSpec` will now add a default prediction output for export if no `export_output` is provided, eliminating the need to explicitly include a `PredictOutput` object in the `model_fn` for simple use-cases.
diff --git a/configure.py b/configure.py
index f97bf8a66836a6647ba6aca625cb1526e11b39af..3fcaaa9d0ef51c57fb40fcafd8579977f37375ef 100644
--- a/configure.py
+++ b/configure.py
@@ -41,11 +41,10 @@ _DEFAULT_CUDA_PATH = '/usr/local/cuda'
_DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
_DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION)
-_DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine()
_TF_OPENCL_VERSION = '1.2'
_DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
-_SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15]
+_SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15, 16]
_DEFAULT_PROMPT_ASK_ATTEMPTS = 10
@@ -54,6 +53,11 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
_TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
_TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
+if platform.machine() == 'ppc64le':
+ _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/'
+else:
+ _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine()
+
class UserInputError(Exception):
pass
@@ -153,14 +157,18 @@ def get_python_path(environ_cp, python_bin_path):
if environ_cp.get('PYTHONPATH'):
python_paths = environ_cp.get('PYTHONPATH').split(':')
try:
- library_paths = run_shell(
- [python_bin_path, '-c',
- 'import site; print("\\n".join(site.getsitepackages()))']).split('\n')
+ library_paths = run_shell([
+ python_bin_path, '-c',
+ 'import site; print("\\n".join(site.getsitepackages()))'
+ ]).split('\n')
except subprocess.CalledProcessError:
- library_paths = [run_shell(
- [python_bin_path, '-c',
- 'from distutils.sysconfig import get_python_lib;'
- 'print(get_python_lib())'])]
+ library_paths = [
+ run_shell([
+ python_bin_path, '-c',
+ 'from distutils.sysconfig import get_python_lib;'
+ 'print(get_python_lib())'
+ ])
+ ]
all_paths = set(python_paths + library_paths)
@@ -187,8 +195,7 @@ def setup_python(environ_cp):
environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path,
default_python_bin_path)
# Check if the path is valid
- if os.path.isfile(python_bin_path) and os.access(
- python_bin_path, os.X_OK):
+ if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK):
break
elif not os.path.exists(python_bin_path):
print('Invalid python path: %s cannot be found.' % python_bin_path)
@@ -230,8 +237,9 @@ def setup_python(environ_cp):
environ_cp['PYTHON_BIN_PATH'] = python_bin_path
# Write tools/python_bin_path.sh
- with open(os.path.join(
- _TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), 'w') as f:
+ with open(
+ os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'),
+ 'w') as f:
f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path)
@@ -250,7 +258,7 @@ def reset_tf_configure_bazelrc(workspace_path):
continue
f.write('%s\n' % l)
if is_windows():
- tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/")
+ tf_bazelrc_path = _TF_BAZELRC.replace('\\', '/')
else:
tf_bazelrc_path = _TF_BAZELRC
f.write('import %s\n' % tf_bazelrc_path)
@@ -261,8 +269,8 @@ def cleanup_makefile():
These files could interfere with Bazel parsing.
"""
- makefile_download_dir = os.path.join(
- _TF_WORKSPACE_ROOT, 'tensorflow', 'contrib', 'makefile', 'downloads')
+ makefile_download_dir = os.path.join(_TF_WORKSPACE_ROOT, 'tensorflow',
+ 'contrib', 'makefile', 'downloads')
if os.path.isdir(makefile_download_dir):
for root, _, filenames in os.walk(makefile_download_dir):
for f in filenames:
@@ -330,9 +338,8 @@ def get_var(environ_cp,
'Environment variable %s must be set as a boolean indicator.\n'
'The following are accepted as TRUE : %s.\n'
'The following are accepted as FALSE: %s.\n'
- 'Current value is %s.' % (
- var_name, ', '.join(true_strings), ', '.join(false_strings),
- var))
+ 'Current value is %s.' % (var_name, ', '.join(true_strings),
+ ', '.join(false_strings), var))
while var is None:
user_input_origin = get_input(question)
@@ -355,8 +362,12 @@ def get_var(environ_cp,
return var
-def set_build_var(environ_cp, var_name, query_item, option_name,
- enabled_by_default, bazel_config_name=None):
+def set_build_var(environ_cp,
+ var_name,
+ query_item,
+ option_name,
+ enabled_by_default,
+ bazel_config_name=None):
"""Set if query_item will be enabled for the build.
Ask user if query_item will be enabled. Default is used if no input is given.
@@ -379,8 +390,8 @@ def set_build_var(environ_cp, var_name, query_item, option_name,
elif bazel_config_name is not None:
# TODO(mikecase): Migrate all users of configure.py to use --config Bazel
# options and not to set build configs through environment variables.
- write_to_bazelrc('build:%s --define %s=true'
- % (bazel_config_name, option_name))
+ write_to_bazelrc(
+ 'build:%s --define %s=true' % (bazel_config_name, option_name))
def set_action_env_var(environ_cp,
@@ -447,7 +458,8 @@ def check_bazel_version(min_version):
if which('bazel') is None:
print('Cannot find bazel. Please install bazel.')
sys.exit(0)
- curr_version = run_shell(['bazel', '--batch', '--bazelrc=/dev/null', 'version'])
+ curr_version = run_shell(
+ ['bazel', '--batch', '--bazelrc=/dev/null', 'version'])
for line in curr_version.split('\n'):
if 'Build label: ' in line:
@@ -499,6 +511,7 @@ def set_cc_opt_flags(environ_cp):
write_to_bazelrc('build:opt --host_copt=-march=native')
write_to_bazelrc('build:opt --define with_default_optimizations=true')
+
def set_tf_cuda_clang(environ_cp):
"""set TF_CUDA_CLANG action_env.
@@ -581,16 +594,14 @@ def set_clang_cuda_compiler_path(environ_cp):
clang_cuda_compiler_path)
-def prompt_loop_or_load_from_env(
- environ_cp,
- var_name,
- var_default,
- ask_for_var,
- check_success,
- error_msg,
- suppress_default_error=False,
- n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS
-):
+def prompt_loop_or_load_from_env(environ_cp,
+ var_name,
+ var_default,
+ ask_for_var,
+ check_success,
+ error_msg,
+ suppress_default_error=False,
+ n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS):
"""Loop over user prompts for an ENV param until receiving a valid response.
For the env param var_name, read from the environment or verify user input
@@ -629,9 +640,7 @@ def prompt_loop_or_load_from_env(
)
for _ in range(n_ask_attempts):
- val = get_from_env_or_user_or_default(environ_cp,
- var_name,
- full_query,
+ val = get_from_env_or_user_or_default(environ_cp, var_name, full_query,
default)
if check_success(val):
break
@@ -639,9 +648,9 @@ def prompt_loop_or_load_from_env(
print(error_msg % val)
environ_cp[var_name] = ''
else:
- raise UserInputError('Invalid %s setting was provided %d times in a row. '
- 'Assuming to be a scripting mistake.' %
- (var_name, n_ask_attempts))
+ raise UserInputError(
+ 'Invalid %s setting was provided %d times in a row. '
+ 'Assuming to be a scripting mistake.' % (var_name, n_ask_attempts))
environ_cp[var_name] = val
return val
@@ -650,8 +659,8 @@ def prompt_loop_or_load_from_env(
def create_android_ndk_rule(environ_cp):
"""Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule."""
if is_windows() or is_cygwin():
- default_ndk_path = cygpath('%s/Android/Sdk/ndk-bundle' %
- environ_cp['APPDATA'])
+ default_ndk_path = cygpath(
+ '%s/Android/Sdk/ndk-bundle' % environ_cp['APPDATA'])
elif is_macos():
default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME']
else:
@@ -668,8 +677,7 @@ def create_android_ndk_rule(environ_cp):
ask_for_var='Please specify the home path of the Android NDK to use.',
check_success=valid_ndk_path,
error_msg=('The path %s or its child file "source.properties" '
- 'does not exist.')
- )
+ 'does not exist.'))
write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path)
write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL',
check_ndk_level(android_ndk_home_path))
@@ -703,9 +711,9 @@ def create_android_sdk_rule(environ_cp):
api_levels = [x.replace('android-', '') for x in api_levels]
def valid_api_level(api_level):
- return os.path.exists(os.path.join(android_sdk_home_path,
- 'platforms',
- 'android-' + api_level))
+ return os.path.exists(
+ os.path.join(android_sdk_home_path, 'platforms',
+ 'android-' + api_level))
android_api_level = prompt_loop_or_load_from_env(
environ_cp,
@@ -720,9 +728,8 @@ def create_android_sdk_rule(environ_cp):
versions = sorted(os.listdir(build_tools))
def valid_build_tools(version):
- return os.path.exists(os.path.join(android_sdk_home_path,
- 'build-tools',
- version))
+ return os.path.exists(
+ os.path.join(android_sdk_home_path, 'build-tools', version))
android_build_tools_version = prompt_loop_or_load_from_env(
environ_cp,
@@ -736,10 +743,8 @@ def create_android_sdk_rule(environ_cp):
write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION',
android_build_tools_version)
- write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL',
- android_api_level)
- write_action_env_to_bazelrc('ANDROID_SDK_HOME',
- android_sdk_home_path)
+ write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', android_api_level)
+ write_action_env_to_bazelrc('ANDROID_SDK_HOME', android_sdk_home_path)
def check_ndk_level(android_ndk_home_path):
@@ -798,6 +803,7 @@ def reformat_version_sequence(version_str, sequence_count):
Args:
version_str: String, the version string.
sequence_count: int, an integer.
+
Returns:
string, reformatted version string.
"""
@@ -839,19 +845,27 @@ def set_tf_cuda_version(environ_cp):
cuda_toolkit_path = cygpath(cuda_toolkit_path)
if is_windows():
- cuda_rt_lib_path = 'lib/x64/cudart.lib'
+ cuda_rt_lib_paths = ['lib/x64/cudart.lib']
elif is_linux():
- cuda_rt_lib_path = 'lib64/libcudart.so.%s' % tf_cuda_version
+ cuda_rt_lib_paths = [
+ '%s/libcudart.so.%s' % (x, tf_cuda_version) for x in [
+ 'lib64',
+ 'lib/powerpc64le-linux-gnu',
+ 'lib/x86_64-linux-gnu',
+ ]
+ ]
elif is_macos():
- cuda_rt_lib_path = 'lib/libcudart.%s.dylib' % tf_cuda_version
+ cuda_rt_lib_paths = ['lib/libcudart.%s.dylib' % tf_cuda_version]
- cuda_toolkit_path_full = os.path.join(cuda_toolkit_path, cuda_rt_lib_path)
- if os.path.exists(cuda_toolkit_path_full):
+ cuda_toolkit_paths_full = [
+ os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths
+ ]
+ if any([os.path.exists(x) for x in cuda_toolkit_paths_full]):
break
# Reset and retry
print('Invalid path to CUDA %s toolkit. %s cannot be found' %
- (tf_cuda_version, cuda_toolkit_path_full))
+ (tf_cuda_version, cuda_toolkit_paths_full))
environ_cp['TF_CUDA_VERSION'] = ''
environ_cp['CUDA_TOOLKIT_PATH'] = ''
@@ -918,8 +932,8 @@ def set_tf_cudnn_version(environ_cp):
cudnn_path_from_ldconfig)
if cudnn_path_from_ldconfig:
cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1)
- if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig,
- tf_cudnn_version)):
+ if os.path.exists(
+ '%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)):
cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig)
break
@@ -1165,6 +1179,7 @@ def get_native_cuda_compute_capabilities(environ_cp):
Args:
environ_cp: copy of the os.environ.
+
Returns:
string of native cuda compute capabilities, separated by comma.
"""
@@ -1289,8 +1304,7 @@ def set_computecpp_toolkit_path(environ_cp):
else:
sycl_rt_lib_path = ''
- sycl_rt_lib_path_full = os.path.join(toolkit_path,
- sycl_rt_lib_path)
+ sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path)
exists = os.path.exists(sycl_rt_lib_path_full)
if not exists:
print('Invalid SYCL %s library path. %s cannot be found' %
@@ -1318,8 +1332,8 @@ def set_trisycl_include_dir(environ_cp):
ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
'include directory. (Use --config=sycl_trisycl '
'when building with Bazel) '
- '[Default is %s]: '
- ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+ '[Default is %s]: ') % (
+ _DEFAULT_TRISYCL_INCLUDE_DIR)
while True:
trisycl_include_dir = get_from_env_or_user_or_default(
@@ -1328,13 +1342,12 @@ def set_trisycl_include_dir(environ_cp):
if os.path.exists(trisycl_include_dir):
break
- print('Invalid triSYCL include directory, %s cannot be found'
- % (trisycl_include_dir))
+ print('Invalid triSYCL include directory, %s cannot be found' %
+ (trisycl_include_dir))
# Set TRISYCL_INCLUDE_DIR
environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
- write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
- trisycl_include_dir)
+ write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
def set_mpi_home(environ_cp):
@@ -1344,8 +1357,9 @@ def set_mpi_home(environ_cp):
default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home))
def valid_mpi_path(mpi_home):
- exists = (os.path.exists(os.path.join(mpi_home, 'include')) and
- os.path.exists(os.path.join(mpi_home, 'lib')))
+ exists = (
+ os.path.exists(os.path.join(mpi_home, 'include')) and
+ os.path.exists(os.path.join(mpi_home, 'lib')))
if not exists:
print('Invalid path to the MPI Toolkit. %s or %s cannot be found' %
(os.path.join(mpi_home, 'include'),
@@ -1394,12 +1408,21 @@ def set_other_mpi_vars(environ_cp):
raise ValueError('Cannot find the MPI library file in %s/lib' % mpi_home)
-def set_grpc_build_flags():
- write_to_bazelrc('build --define grpc_no_ares=true')
-
+def set_system_libs_flag(environ_cp):
+ syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
+ if syslibs and syslibs != '':
+ if ',' in syslibs:
+ syslibs = ','.join(sorted(syslibs.split(',')))
+ else:
+ syslibs = ','.join(sorted(syslibs.split()))
+ write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
-def set_build_strip_flag():
- write_to_bazelrc('build --strip=always')
+ if 'PREFIX' in environ_cp:
+ write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
+ if 'LIBDIR' in environ_cp:
+ write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
+ if 'INCLUDEDIR' in environ_cp:
+ write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
def set_windows_build_flags(environ_cp):
@@ -1417,14 +1440,20 @@ def set_windows_build_flags(environ_cp):
# TODO(pcloudy): Remove this flag when upgrading Bazel to 0.16.0
# Short object file path will be enabled by default.
write_to_bazelrc('build --experimental_shortened_obj_file_path=true')
+ # When building zip file for some py_binary and py_test targets, don't
+ # include its dependencies. This is for:
+ # 1. Running python tests against the system installed TF pip package.
+ # 2. Avoiding redundant files in
+ # //tensorflow/tools/pip_package:simple_console_windows,
+ # which is a py_binary used during creating TF pip package.
+ # See https://github.com/tensorflow/tensorflow/issues/22390
+ write_to_bazelrc('build --define=no_tensorflow_py_deps=true')
if get_var(
environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline',
- True,
- ('Would you like to override eigen strong inline for some C++ '
- 'compilation to reduce the compilation time?'),
- 'Eigen strong inline overridden.',
- 'Not overriding eigen strong inline, '
+ True, ('Would you like to override eigen strong inline for some C++ '
+ 'compilation to reduce the compilation time?'),
+ 'Eigen strong inline overridden.', 'Not overriding eigen strong inline, '
'some compilations could take more than 20 mins.'):
# Due to a known MSVC compiler issue
# https://github.com/tensorflow/tensorflow/issues/10521
@@ -1441,10 +1470,11 @@ def config_info_line(name, help_text):
def main():
parser = argparse.ArgumentParser()
- parser.add_argument("--workspace",
- type=str,
- default=_TF_WORKSPACE_ROOT,
- help="The absolute path to your active Bazel workspace.")
+ parser.add_argument(
+ '--workspace',
+ type=str,
+ default=_TF_WORKSPACE_ROOT,
+ help='The absolute path to your active Bazel workspace.')
args = parser.parse_args()
# Make a copy of os.environ to be clear when functions and getting and setting
@@ -1472,8 +1502,6 @@ def main():
# Windows.
environ_cp['TF_DOWNLOAD_CLANG'] = '0'
environ_cp['TF_ENABLE_XLA'] = '0'
- environ_cp['TF_NEED_GDR'] = '0'
- environ_cp['TF_NEED_VERBS'] = '0'
environ_cp['TF_NEED_MPI'] = '0'
environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0'
@@ -1486,7 +1514,7 @@ def main():
# runtime to allow the Tensorflow testcases which compare numpy
# results to Tensorflow results to succeed.
if is_ppc64le():
- write_action_env_to_bazelrc("OMP_NUM_THREADS", 1)
+ write_action_env_to_bazelrc('OMP_NUM_THREADS', 1)
set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc',
'with_jemalloc', True)
@@ -1500,10 +1528,7 @@ def main():
'with_kafka_support', True, 'kafka')
set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
False, 'xla')
- set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support',
- False, 'gdr')
- set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
- False, 'verbs')
+
set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
@@ -1537,6 +1562,10 @@ def main():
if environ_cp.get('TF_DOWNLOAD_CLANG') != '1':
# Set up which clang we should use as the cuda / host compiler.
set_clang_cuda_compiler_path(environ_cp)
+ else:
+ # Use downloaded LLD for linking.
+ write_to_bazelrc('build:cuda_clang --config=download_clang_use_lld')
+ write_to_bazelrc('test:cuda_clang --config=download_clang_use_lld')
else:
# Set up which gcc nvcc should use as the host compiler
# No need to set this on Windows
@@ -1556,19 +1585,18 @@ def main():
set_mpi_home(environ_cp)
set_other_mpi_vars(environ_cp)
- set_grpc_build_flags()
set_cc_opt_flags(environ_cp)
- set_build_strip_flag()
+ set_system_libs_flag(environ_cp)
if is_windows():
set_windows_build_flags(environ_cp)
- if get_var(
- environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
- False,
- ('Would you like to interactively configure ./WORKSPACE for '
- 'Android builds?'),
- 'Searching for NDK and SDK installations.',
- 'Not configuring the WORKSPACE for Android builds.'):
+ # Add a config option to build TensorFlow 2.0 API.
+ write_to_bazelrc('build:v2 --define=tf_api_version=2')
+
+ if get_var(environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', False,
+ ('Would you like to interactively configure ./WORKSPACE for '
+ 'Android builds?'), 'Searching for NDK and SDK installations.',
+ 'Not configuring the WORKSPACE for Android builds.'):
create_android_ndk_rule(environ_cp)
create_android_sdk_rule(environ_cp)
@@ -1581,6 +1609,11 @@ def main():
'more details.')
config_info_line('mkl', 'Build with MKL support.')
config_info_line('monolithic', 'Config for mostly static monolithic build.')
+ config_info_line('gdr', 'Build with GDR support.')
+ config_info_line('verbs', 'Build with libverbs support.')
+ config_info_line('ngraph', 'Build with Intel ngraph support.')
+
if __name__ == '__main__':
main()
+
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index e13a5cf802ece5fd53c1ca2db931a548aa7fe451..3610eea42a58ab74940e059736dd692713d001f1 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -12,6 +12,7 @@ exports_files([
# The leakr files are used by //third_party/cloud_tpu.
"leakr_badwords.dic",
"leakr_badfiles.dic",
+ "leakr_file_type_recipe.ftrcp",
])
load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object")
@@ -23,6 +24,24 @@ load(
"//tensorflow/python/tools/api/generator:api_gen.bzl",
"gen_api_init_files", # @unused
)
+load("//tensorflow/python/tools/api/generator:api_gen.bzl", "get_compat_files")
+load(
+ "//tensorflow/python/tools/api/generator:api_init_files.bzl",
+ "TENSORFLOW_API_INIT_FILES", # @unused
+)
+load(
+ "//tensorflow/python/tools/api/generator:api_init_files_v1.bzl",
+ "TENSORFLOW_API_INIT_FILES_V1", # @unused
+)
+load(
+ "//third_party/ngraph:build_defs.bzl",
+ "if_ngraph",
+)
+
+# @unused
+TENSORFLOW_API_INIT_FILES_V2 = (
+ TENSORFLOW_API_INIT_FILES + get_compat_files(TENSORFLOW_API_INIT_FILES_V1, 1)
+)
# Config setting used when building for products
# which requires restricted licenses to be avoided.
@@ -123,12 +142,6 @@ config_setting(
visibility = ["//visibility:public"],
)
-config_setting(
- name = "windows_msvc",
- values = {"cpu": "x64_windows_msvc"},
- visibility = ["//visibility:public"],
-)
-
config_setting(
name = "no_tensorflow_py_deps",
define_values = {"no_tensorflow_py_deps": "true"},
@@ -387,6 +400,7 @@ config_setting(
define_values = {
"dynamic_loaded_kernels": "true",
},
+ visibility = ["//visibility:public"],
)
config_setting(
@@ -416,12 +430,28 @@ config_setting(
visibility = ["//visibility:public"],
)
+# This flag is set from the configure step when the user selects with nGraph option.
+# By default it should be false
+config_setting(
+ name = "with_ngraph_support",
+ values = {"define": "with_ngraph_support=true"},
+ visibility = ["//visibility:public"],
+)
+
+# This flag specifies whether TensorFlow 2.0 API should be built instead
+# of 1.* API. Note that TensorFlow 2.0 API is currently under development.
+config_setting(
+ name = "api_version_2",
+ define_values = {"tf_api_version": "2"},
+)
+
package_group(
name = "internal",
packages = [
"-//third_party/tensorflow/python/estimator",
"//learning/meta_rank/...",
"//tensorflow/...",
+ "//tensorflow_estimator/...",
"//tensorflow_fold/llgtm/...",
"//third_party/py/tensor2tensor/...",
],
@@ -429,12 +459,12 @@ package_group(
load(
"//third_party/mkl:build_defs.bzl",
- "if_mkl",
+ "if_mkl_ml",
)
filegroup(
name = "intel_binary_blob",
- data = if_mkl(
+ data = if_mkl_ml(
[
"//third_party/mkl:intel_binary_blob",
],
@@ -487,7 +517,6 @@ tf_cc_shared_object(
linkopts = select({
"//tensorflow:darwin": [],
"//tensorflow:windows": [],
- "//tensorflow:windows_msvc": [],
"//conditions:default": [
"-Wl,--version-script", # This line must be directly followed by the version_script.lds file
"$(location //tensorflow:tf_framework_version_script.lds)",
@@ -529,13 +558,13 @@ tf_cc_shared_object(
"-Wl,-install_name,@rpath/libtensorflow.so",
],
"//tensorflow:windows": [],
- "//tensorflow:windows_msvc": [],
"//conditions:default": [
"-z defs",
"-Wl,--version-script", # This line must be directly followed by the version_script.lds file
"$(location //tensorflow/c:version_script.lds)",
],
}),
+ visibility = ["//visibility:public"],
deps = [
"//tensorflow/c:c_api",
"//tensorflow/c:c_api_experimental",
@@ -554,13 +583,13 @@ tf_cc_shared_object(
"$(location //tensorflow:tf_exported_symbols.lds)",
],
"//tensorflow:windows": [],
- "//tensorflow:windows_msvc": [],
"//conditions:default": [
"-z defs",
"-Wl,--version-script", # This line must be directly followed by the version_script.lds file
"$(location //tensorflow:tf_version_script.lds)",
],
}),
+ visibility = ["//visibility:public"],
deps = [
"//tensorflow:tf_exported_symbols.lds",
"//tensorflow:tf_version_script.lds",
@@ -571,7 +600,7 @@ tf_cc_shared_object(
"//tensorflow/cc:scope",
"//tensorflow/cc/profiler",
"//tensorflow/core:tensorflow",
- ],
+ ] + if_ngraph(["@ngraph_tf//:ngraph_tf"]),
)
exports_files(
@@ -581,9 +610,73 @@ exports_files(
],
)
+genrule(
+ name = "install_headers",
+ srcs = [
+ "//tensorflow/c:headers",
+ "//tensorflow/c/eager:headers",
+ "//tensorflow/cc:headers",
+ "//tensorflow/core:headers",
+ ],
+ outs = ["include"],
+ cmd = """
+ mkdir $@
+ for f in $(SRCS); do
+ d="$${f%/*}"
+ d="$${d#bazel-out*genfiles/}"
+ d="$${d#*external/eigen_archive/}"
+
+ if [[ $${d} == *local_config_* ]]; then
+ continue
+ fi
+
+ if [[ $${d} == external* ]]; then
+ extname="$${d#*external/}"
+ extname="$${extname%%/*}"
+ if [[ $${TF_SYSTEM_LIBS:-} == *$${extname}* ]]; then
+ continue
+ fi
+ fi
+
+ mkdir -p "$@/$${d}"
+ cp "$${f}" "$@/$${d}/"
+ done
+ """,
+ tags = ["manual"],
+ visibility = ["//visibility:public"],
+)
+
+genrule(
+ name = "root_init_gen",
+ srcs = select({
+ "api_version_2": [":tf_python_api_gen_v2"],
+ "//conditions:default": [":tf_python_api_gen_v1"],
+ }),
+ outs = ["__init__.py"],
+ cmd = select({
+ "api_version_2": "cp $(@D)/_api/v2/__init__.py $(OUTS)",
+ "//conditions:default": "cp $(@D)/_api/v1/__init__.py $(OUTS)",
+ }),
+)
+
+gen_api_init_files(
+ name = "tf_python_api_gen_v1",
+ srcs = ["api_template.__init__.py"],
+ api_version = 1,
+ output_dir = "_api/v1/",
+ output_files = TENSORFLOW_API_INIT_FILES_V1,
+ output_package = "tensorflow._api.v1",
+ root_init_template = "api_template.__init__.py",
+)
+
gen_api_init_files(
- name = "tensorflow_python_api_gen",
+ name = "tf_python_api_gen_v2",
srcs = ["api_template.__init__.py"],
+ api_version = 2,
+ compat_api_versions = [1],
+ output_dir = "_api/v2/",
+ output_files = TENSORFLOW_API_INIT_FILES_V2,
+ output_package = "tensorflow._api.v2",
root_init_template = "api_template.__init__.py",
)
@@ -601,7 +694,10 @@ py_library(
py_library(
name = "tensorflow_py_no_contrib",
- srcs = [":tensorflow_python_api_gen"],
+ srcs = select({
+ "api_version_2": [":tf_python_api_gen_v2"],
+ "//conditions:default": [":tf_python_api_gen_v1"],
+ }) + [":root_init_gen"],
srcs_version = "PY2AND3",
visibility = ["//visibility:public"],
deps = ["//tensorflow/python:no_contrib"],
diff --git a/tensorflow/__init__.py b/tensorflow/__init__.py
index 440e9f8dbd2f4b2a2ab78eaaf26408584e7c1446..21677512b63828fa2035527ed573bf4dc4603085 100644
--- a/tensorflow/__init__.py
+++ b/tensorflow/__init__.py
@@ -28,7 +28,8 @@ contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
del LazyLoader
from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top
-app.flags = flags # pylint: disable=undefined-variable
+from tensorflow.python.platform import app # pylint: disable=g-import-not-at-top
+app.flags = flags
del absolute_import
del division
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 779f65d5b17c350833f67f07985b00e8eb561e72..2de740e145f93b151faf5c987808dbdf73fb4fd7 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -14,15 +14,16 @@
# ==============================================================================
"""Bring in all of the public TensorFlow interface into this module."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import as _absolute_import
+from __future__ import division as _division
+from __future__ import print_function as _print_function
+
+import os as _os
# pylint: disable=g-bad-import-order
from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import
try:
- import os # pylint: disable=g-import-not-at-top
# Add `estimator` attribute to allow access to estimator APIs via
# "tf.estimator..."
from tensorflow.python.estimator.api import estimator # pylint: disable=g-import-not-at-top
@@ -30,9 +31,8 @@ try:
# Add `estimator` to the __path__ to allow "from tensorflow.estimator..."
# style imports.
from tensorflow.python.estimator import api as estimator_api # pylint: disable=g-import-not-at-top
- __path__ += [os.path.dirname(estimator_api.__file__)]
+ __path__ += [_os.path.dirname(estimator_api.__file__)]
del estimator_api
- del os
except (ImportError, AttributeError):
print('tf.estimator package not installed.')
@@ -41,19 +41,32 @@ except (ImportError, AttributeError):
from tensorflow.python.util.lazy_loader import LazyLoader # pylint: disable=g-import-not-at-top
contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
del LazyLoader
+# The templated code that replaces the placeholder above sometimes
+# sets the __all__ variable. If it does, we have to be sure to add
+# "contrib".
+if '__all__' in vars():
+ vars()['__all__'].append('contrib')
from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top
app.flags = flags # pylint: disable=undefined-variable
-del absolute_import
-del division
-del print_function
+# Make sure directory containing top level submodules is in
+# the __path__ so that "from tensorflow.foo import bar" works.
+_tf_api_dir = _os.path.dirname(_os.path.dirname(app.__file__)) # pylint: disable=undefined-variable
+if _tf_api_dir not in __path__:
+ __path__.append(_tf_api_dir)
# These symbols appear because we import the python package which
# in turn imports from tensorflow.core and tensorflow.python. They
# must come from this module. So python adds these symbols for the
# resolution to succeed.
# pylint: disable=undefined-variable
-del python
-del core
+try:
+ del python
+ del core
+except NameError:
+ # Don't fail if these modules are not available.
+ # For e.g. we are using this file for compat.v1 module as well and
+ # 'python', 'core' directories are not under compat/v1.
+ pass
# pylint: enable=undefined-variable
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 8a9301d584775cff3ae315e6fd856b00d1734248..17e2e292eb19029d279bc12a8328edadf96f1bb8 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -117,6 +117,7 @@ tf_cuda_library(
deps = [
":c_api",
":c_api_internal",
+ "//tensorflow/c/eager:c_api",
"//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags",
"//tensorflow/contrib/tpu:all_ops",
"//tensorflow/core:core_cpu",
@@ -127,6 +128,15 @@ tf_cuda_library(
],
)
+cc_library(
+ name = "c_api_headers",
+ hdrs = [
+ "c_api.h",
+ ],
+ copts = tf_copts(),
+ visibility = ["//tensorflow:__subpackages__"],
+)
+
exports_files(
[
"version_script.lds",
@@ -194,6 +204,7 @@ tf_cuda_cc_test(
"//tensorflow:darwin": ["-headerpad_max_install_names"],
"//conditions:default": [],
}),
+ tags = ["noasan"],
# We must ensure that the dependencies can be dynamically linked since
# the shared library must be able to use core:framework.
# linkstatic = tf_kernel_tests_linkstatic(),
@@ -235,6 +246,7 @@ tf_cc_test(
":c_api_experimental",
":c_test_util",
"//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 19ccb6e71d2f3021c1ce5c8905d8a72059c1cfcb..79811ceae57e0bddeb2a6f32bad7003e14e23422 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -39,6 +39,7 @@ limitations under the License.
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/partial_tensor_shape.h"
#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor.pb.h" // NOLINT
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/tensor_shape.pb.h"
#include "tensorflow/core/framework/types.h"
@@ -202,7 +203,8 @@ TF_Tensor* TF_NewTensor(TF_DataType dtype, const int64_t* dims, int num_dims,
buf->len_ = len;
if (dtype != TF_STRING && dtype != TF_RESOURCE &&
tensorflow::DataTypeCanUseMemcpy(static_cast(dtype)) &&
- reinterpret_cast(data) % EIGEN_MAX_ALIGN_BYTES != 0) {
+ reinterpret_cast(data) % std::max(1, EIGEN_MAX_ALIGN_BYTES) !=
+ 0) {
// TF_STRING and TF_RESOURCE tensors have a different representation in
// TF_Tensor than they do in tensorflow::Tensor. So a copy here is a waste
// (any alignment requirements will be taken care of by TF_TensorToTensor
@@ -1239,7 +1241,7 @@ void TF_SetAttrTypeList(TF_OperationDescription* desc, const char* attr_name,
void TF_SetAttrFuncName(TF_OperationDescription* desc, const char* attr_name,
const char* value, size_t length) {
tensorflow::NameAttrList func_name;
- func_name.set_name(std::string(value, value + length));
+ func_name.set_name(string(value, value + length));
desc->node_builder.Attr(attr_name, func_name);
}
@@ -2064,7 +2066,7 @@ static void GraphImportGraphDefLocked(TF_Graph* graph, const GraphDef& def,
for (int i = 0; i < size; ++i) {
TensorId id = results.missing_unused_input_map_keys[i];
- tf_results->missing_unused_key_names_data.push_back(std::string(id.first));
+ tf_results->missing_unused_key_names_data.emplace_back(id.first);
tf_results->missing_unused_key_names[i] =
tf_results->missing_unused_key_names_data.back().c_str();
tf_results->missing_unused_key_indexes[i] = id.second;
diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index 69b3ffe2a1f620e346405607ecf742fb863aa644..f316e4ba6735213ba2fbbc1f8c019ad235c0df1f 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -17,11 +17,13 @@ limitations under the License.
#include "tensorflow/c/c_api_internal.h"
#include "tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h"
+#include "tensorflow/core/framework/tensor.pb.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/platform.h"
#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
using tensorflow::FunctionDef;
using tensorflow::Node;
@@ -79,6 +81,18 @@ TF_Buffer* TF_CreateConfig(unsigned char enable_xla_compilation,
auto* gpu_options = config.mutable_gpu_options();
gpu_options->set_allow_growth(gpu_memory_allow_growth);
+ // TODO(b/113217601): This is needed for EagerContext::runner_ to use a
+ // threadpool, so that we avoid the possibility of running the runner_ in the
+ // threadpool of GPU event mgr, as that can trigger more callbacks to be
+ // scheduled on that same threadpool, causing a deadlock in cases where the
+ // caller of event_mgr->ThenExecute() blocks on the completion of the callback
+ // (as in the case of ConstOp kernel creation on GPU, which involves copying a
+ // CPU tensor to GPU).
+ // Setting a larger thread pool does not help with the Swift caller, as we use
+ // a different TFE context for each thread of execution (for running graph
+ // functions, and their send/recvs corountines).
+ config.set_inter_op_parallelism_threads(1);
+
TF_Buffer* ret = TF_NewBuffer();
TF_CHECK_OK(MessageToBuffer(config, ret));
return ret;
@@ -8494,3 +8508,265 @@ void TF_EnqueueNamedTensor(TF_Session* session, int tensor_id,
/*run_metadata*/ nullptr, status);
VLOG(1) << "Enqueuing is done.";
}
+
+TF_Buffer* TFE_GetServerDef(const char* text_proto, TF_Status* status) {
+ tensorflow::ServerDef server_def;
+ if (!tensorflow::protobuf::TextFormat::ParseFromString(text_proto,
+ &server_def)) {
+ status->status = tensorflow::errors::Internal(
+ "Invalid text proto for ServerDef: ", text_proto);
+ return nullptr;
+ }
+ status->status = tensorflow::Status();
+ TF_Buffer* ret = TF_NewBuffer();
+ TF_CHECK_OK(MessageToBuffer(server_def, ret));
+ return ret;
+}
+
+TFE_Context* TFE_CreateContextFromSession(TF_Session* session,
+ TF_Status* status) {
+ auto* opts = TFE_NewContextOptions();
+
+ // Reduce GPU memory allocation, and set appropriate config options for TFE
+ // context.
+ auto* config =
+ TF_CreateConfig(/*xla*/ false, /* gpu_memory_allow_growth */ true);
+ TFE_ContextOptionsSetConfig(opts, config->data, config->length, status);
+ if (!status->status.ok()) {
+ CHECK(!config);
+ TFE_DeleteContextOptions(opts);
+ return nullptr;
+ }
+
+ auto* ctx = TFE_NewContextFromSession(opts, session, status);
+ TF_DeleteBuffer(config);
+ TFE_DeleteContextOptions(opts);
+ return ctx;
+}
+
+// TODO: retrieve the device string via TFE_ContextListDevices()
+static const char DEFAULT_CPU_DEVICE[] =
+ "/job:localhost/replica:0/task:0/device:CPU:0";
+
+static TFE_TensorHandle* createTFEQueue(TFE_Context* ctx, TF_DataType inputType,
+ int tensor_id, TF_Status* status) {
+ std::unique_ptr queueOp(
+ TFE_NewOp(ctx, "FIFOQueueV2", status), TFE_DeleteOp);
+ TFE_OpSetDevice(queueOp.get(), DEFAULT_CPU_DEVICE, status);
+ if (!status->status.ok()) return nullptr;
+ // TODO: use NAMED_TENSOR_QUEUE_CAPACITY in S4TF compiler.
+ TFE_OpSetAttrInt(queueOp.get(), "capacity", 1);
+ TFE_OpSetAttrTypeList(queueOp.get(), "component_types", &inputType, 1);
+ auto shared_name = tensorflow::strings::StrCat("fifo_queue_", tensor_id);
+ TFE_OpSetAttrString(queueOp.get(), "shared_name", shared_name.data(),
+ shared_name.size());
+ TFE_OpSetAttrString(queueOp.get(), "container", "", 0);
+
+ // TODO: consider making this an unknown shape.
+ const int64_t* dims_ptr = nullptr;
+ int num_dims = 0;
+ TFE_OpSetAttrShapeList(queueOp.get(), "shapes", &dims_ptr, &num_dims,
+ /*num_values*/ 0, status);
+ if (!status->status.ok()) return nullptr;
+
+ int num_retvals = 1;
+ TFE_TensorHandle* queue = nullptr;
+ TFE_Execute(queueOp.get(), &queue, &num_retvals, status);
+ if (!status->status.ok()) return nullptr;
+ CHECK_EQ(num_retvals, 1);
+
+ return queue;
+}
+
+static void createTFEEnqueue(TFE_Context* ctx, TF_DataType inputType,
+ TFE_TensorHandle* queue, TFE_TensorHandle* tensor,
+ TF_Status* status) {
+ TFE_Op* op = TFE_NewOp(ctx, "QueueEnqueueV2", status);
+ if (!status->status.ok()) return;
+ std::unique_ptr op_deleter(op, TFE_DeleteOp);
+ TFE_OpSetDevice(op, DEFAULT_CPU_DEVICE, status);
+ if (!status->status.ok()) return;
+ TFE_OpAddInput(op, queue, status);
+ if (!status->status.ok()) return;
+ TFE_OpAddInput(op, tensor, status);
+ if (!status->status.ok()) return;
+ TFE_OpSetAttrTypeList(op, "Tcomponents", &inputType, 1);
+ TFE_OpSetAttrInt(op, "timeout_ms", -1);
+
+ int num_retvals = 0;
+ TFE_Execute(op, nullptr /*retvals*/, &num_retvals, status);
+ if (!status->status.ok()) return;
+ CHECK_EQ(num_retvals, 0);
+}
+
+static TFE_TensorHandle* createTFEDequeue(TFE_Context* ctx,
+ TF_DataType inputType,
+ TFE_TensorHandle* queue,
+ TF_Status* status) {
+ TFE_Op* op = TFE_NewOp(ctx, "QueueDequeueV2", status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr op_deleter(op, TFE_DeleteOp);
+ TFE_OpSetDevice(op, DEFAULT_CPU_DEVICE, status);
+ if (!status->status.ok()) return nullptr;
+
+ TFE_OpAddInput(op, queue, status);
+ if (!status->status.ok()) return nullptr;
+ TFE_OpSetAttrTypeList(op, "component_types", &inputType, 1);
+ TFE_OpSetAttrInt(op, "timeout_ms", -1);
+ TFE_TensorHandle* ret;
+ int num_retvals = 1;
+ TFE_Execute(op, &ret, &num_retvals, status);
+ if (!status->status.ok()) return nullptr;
+ CHECK_EQ(num_retvals, 1);
+ return ret;
+}
+
+TFE_TensorHandle* TFE_DequeueNamedTensor(TF_Session* session, int tensor_id,
+ TF_DataType inputType,
+ TF_Status* status) {
+ assert(session);
+ VLOG(1) << "Dequeuing data tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ auto* ret = createTFEDequeue(ctx, inputType, queue, status);
+ return ret;
+}
+
+TFE_TensorHandle* TFE_DequeueNamedTensorFromCtx(TFE_Context* ctx, int tensor_id,
+ TF_DataType inputType,
+ TF_Status* status) {
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ auto* ret = createTFEDequeue(ctx, inputType, queue, status);
+
+ return ret;
+}
+
+void TFE_EnqueueNamedTensor(TF_Session* session, int tensor_id,
+ TFE_TensorHandle* tensor, TF_Status* status) {
+ assert(session);
+ VLOG(1) << "Enqueuing data tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TF_DataType inputType = TFE_TensorHandleDataType(tensor);
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ createTFEEnqueue(ctx, inputType, queue, tensor, status);
+}
+
+void TFE_EnqueueNamedTensorFromCtx(TFE_Context* ctx, int tensor_id,
+ TFE_TensorHandle* tensor,
+ TF_Status* status) {
+ VLOG(1) << "Enqueuing data tensor with id " << tensor_id;
+
+ TF_DataType inputType = TFE_TensorHandleDataType(tensor);
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ createTFEEnqueue(ctx, inputType, queue, tensor, status);
+}
+
+void TFE_EnqueueVariantTensor(TF_Session* session, int tensor_id,
+ TFE_TensorHandle* tensor, TF_Status* status) {
+ VLOG(1) << "Enqueuing variant tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TFE_TensorHandle* queue = createTFEQueue(ctx, TF_VARIANT, tensor_id, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ createTFEEnqueue(ctx, TF_VARIANT, queue, tensor, status);
+}
+
+TFE_TensorHandle* TFE_DequeueVariantTensor(TF_Session* session, int tensor_id,
+ TF_Status* status) {
+ VLOG(1) << "Dequeuing variant tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TFE_TensorHandle* queue = createTFEQueue(ctx, TF_VARIANT, tensor_id, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ return createTFEDequeue(ctx, TF_VARIANT, queue, status);
+}
+
+static void CheckOk(TF_Status* status) {
+ CHECK_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+}
+
+void TFE_TensorHandlePrintDebugString(TFE_TensorHandle* handle) {
+ auto* status = TF_NewStatus();
+ TF_Tensor* t = TFE_TensorHandleResolve(handle, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ tensorflow::Tensor dst;
+ TF_CHECK_OK(TF_TensorToTensor(t, &dst));
+ LOG(INFO) << dst.DebugString();
+
+ TF_DeleteTensor(t);
+ TF_DeleteStatus(status);
+}
+
+TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx) {
+ // Intentionally LOG into INFO below for ease of debugging.
+ VLOG(1) << "TFE_RunConstOp called";
+
+ auto* status = TF_NewStatus();
+ auto* op = TFE_NewOp(ctx, "Const", status);
+ CheckOk(status);
+ TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
+
+ auto* tensor =
+ TF_AllocateTensor(TF_FLOAT, /*shape.data()*/ nullptr, /*shape.size()*/ 0,
+ TF_DataTypeSize(TF_FLOAT) * 1);
+ auto* ptr = reinterpret_cast(TF_TensorData(tensor));
+ *reinterpret_cast(ptr) = 17.0;
+
+ TFE_OpSetAttrTensor(op, "value", tensor, status);
+ CheckOk(status);
+ TF_DeleteTensor(tensor);
+ VLOG(1) << "New op created";
+
+ TFE_TensorHandle* retval;
+ int num_retvals = 1;
+ TFE_Execute(op, &retval, &num_retvals, status);
+ CheckOk(status);
+ CHECK_EQ(num_retvals, 1);
+ VLOG(1) << "Op executed";
+
+ TFE_DeleteOp(op);
+ TF_DeleteStatus(status);
+
+ return retval;
+}
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index 6617c5a572e90e78369f73d714f39942f213040f..950ad9aeed6f883fa22c2673fa8aa92839cd0fbc 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -20,6 +20,7 @@ limitations under the License.
#include
#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/eager/c_api.h"
// --------------------------------------------------------------------------
// Experimental C API for TensorFlow.
@@ -130,6 +131,59 @@ TF_CAPI_EXPORT extern void TF_EnqueueNamedTensor(TF_Session* session,
int tensor_id,
TF_Tensor* tensor,
TF_Status* status);
+// Create a serialized tensorflow.ServerDef proto.
+TF_Buffer* TFE_GetServerDef(const char* text_proto, TF_Status* status);
+
+// TODO: remove this API in favor of the next one.
+TF_CAPI_EXPORT extern TFE_Context* TFE_NewContextFromSession(
+ const TFE_ContextOptions* opts, TF_Session* sess, TF_Status* status);
+
+// Creates from `session` a new eager context to run a graph function or
+// sends/recvs, so that these concurrent TFE executions can share (via
+// `session` and its associated device mgr) the same set of fifo queue resource
+// ops, used for host<->TF tensor transfers. This way the sends/recvs calls and
+// graph function execution can access the same fifo queue resource handles
+// (associated with devices managed by the device manager, which can be obtained
+// from `session`).
+//
+// TODO: Remove this function once we migrate away from using session.
+TF_CAPI_EXPORT extern TFE_Context* TFE_CreateContextFromSession(
+ TF_Session* session, TF_Status* status);
+
+// TODO: Retire this API in favor of the next one.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueNamedTensor(
+ TF_Session* session, int tensor_id, TF_DataType inputType,
+ TF_Status* status);
+
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueNamedTensorFromCtx(
+ TFE_Context* ctx, int tensor_id, TF_DataType inputType, TF_Status* status);
+
+TF_CAPI_EXPORT extern void TFE_EnqueueNamedTensor(TF_Session* session,
+ int tensor_id,
+ TFE_TensorHandle* tensor,
+ TF_Status* status);
+
+TF_CAPI_EXPORT extern void TFE_EnqueueNamedTensorFromCtx(
+ TFE_Context* ctx, int tensor_id, TFE_TensorHandle* tensor,
+ TF_Status* status);
+
+// TODO: consider folding the 2 APIs below into the ones above.
+TF_CAPI_EXPORT extern void TFE_EnqueueVariantTensor(TF_Session* session,
+ int tensor_id,
+ TFE_TensorHandle* tensor,
+ TF_Status* status);
+
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueVariantTensor(
+ TF_Session* session, int tensor_id, TF_Status* status);
+
+// Prints `handle` in a human readable format to standard output for debugging.
+TF_CAPI_EXPORT extern void TFE_TensorHandlePrintDebugString(
+ TFE_TensorHandle* handle);
+
+// Returns a const scalar tensor.
+// Caller owns both the input and the output tensor handles.
+// TODO: Remove this API with hard-coded tensor computation.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx);
#ifdef __cplusplus
} /* end extern "C" */
diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc
index 30fcfd401d9d634962d64aaa3bf348de91f2ecae..c6effd39697e0397278770b53e98508074f99862 100644
--- a/tensorflow/c/c_api_experimental_test.cc
+++ b/tensorflow/c/c_api_experimental_test.cc
@@ -16,8 +16,10 @@ limitations under the License.
#include "tensorflow/c/c_api_experimental.h"
#include "tensorflow/c/c_test_util.h"
#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
namespace tensorflow {
namespace {
@@ -116,5 +118,49 @@ TEST(CAPI_EXPERIMENTAL, ImagenetIteratorGetNext) {
TF_DeleteStatus(s);
}
+TEST(CAPI_EXPERIMENTAL, GetServerDefTest) {
+ const string expected_text_proto(R"(cluster {
+ job {
+ name: "worker"
+ tasks {
+ key: 0
+ value: "tpuserver:0"
+ }
+ tasks {
+ key: 1
+ value: "localhost:1"
+ }
+ }
+}
+job_name: "worker"
+task_index: 1
+protocol: "grpc"
+)");
+
+ TF_Status* status = TF_NewStatus();
+ TF_Buffer* result = TFE_GetServerDef(expected_text_proto.c_str(), status);
+ EXPECT_EQ(TF_GetCode(status), TF_OK);
+
+ ServerDef actual;
+ ASSERT_TRUE(actual.ParseFromArray(result->data, result->length));
+ string actual_text_proto;
+ tensorflow::protobuf::TextFormat::PrintToString(actual, &actual_text_proto);
+ EXPECT_EQ(expected_text_proto, actual_text_proto);
+
+ const string malformed_text_proto(R"(cluster {
+ job {
+ name: "worker")");
+ TF_Buffer* null_result =
+ TFE_GetServerDef(malformed_text_proto.c_str(), status);
+ EXPECT_NE(TF_GetCode(status), TF_OK);
+ EXPECT_TRUE(tensorflow::str_util::StrContains(
+ TF_Message(status), "Invalid text proto for ServerDef"));
+ EXPECT_EQ(null_result, nullptr);
+
+ // Cleanup
+ TF_DeleteBuffer(result);
+ TF_DeleteStatus(status);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index a2c5a42c11361779de61b515e0f08dcc45e609b9..f68f8a3e90a971b5e4a024feaf26ba498afc48da 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/core/framework/function.pb.h"
#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor.pb.h" // NOLINT
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/lib/strings/base64.h"
diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc
index bb9433ce25e0e3b9cfb54698c940cc1b38c88d31..73fe73769bc1219ce865149d67d333c53371ccc5 100644
--- a/tensorflow/c/c_api_function_test.cc
+++ b/tensorflow/c/c_api_function_test.cc
@@ -1619,5 +1619,66 @@ TEST_F(CApiFunctionTest, GetFunctionsFromGraph) {
TF_DeleteFunction(func1);
}
+// This test only works when the TF build includes XLA compiler. One way to set
+// this up is via bazel build option "--define with_xla_support=true".
+//
+// FIXME: generalize the macro name TENSORFLOW_EAGER_USE_XLA to
+// something like TENSORFLOW_CAPI_USE_XLA.
+#ifdef TENSORFLOW_EAGER_USE_XLA
+TEST_F(CApiFunctionTest, StatelessIf_XLA) {
+ TF_Function* func;
+ const std::string funcName = "BranchFunc";
+ DefineFunction(funcName.c_str(), &func);
+ TF_GraphCopyFunction(host_graph_, func, nullptr, s_);
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+ TF_Operation* feed = Placeholder(host_graph_, s_);
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+ TF_Operation* true_cond = ScalarConst(true, host_graph_, s_);
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+ TF_OperationDescription* desc =
+ TF_NewOperation(host_graph_, "StatelessIf", "IfNode");
+ TF_AddInput(desc, {true_cond, 0});
+ TF_Output inputs[] = {{feed, 0}};
+ TF_AddInputList(desc, inputs, TF_ARRAYSIZE(inputs));
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+ TF_SetAttrType(desc, "Tcond", TF_BOOL);
+ TF_DataType inputType = TF_INT32;
+ TF_SetAttrTypeList(desc, "Tin", &inputType, 1);
+ TF_SetAttrTypeList(desc, "Tout", &inputType, 1);
+ TF_SetAttrFuncName(desc, "then_branch", funcName.data(), funcName.size());
+ TF_SetAttrFuncName(desc, "else_branch", funcName.data(), funcName.size());
+ TF_SetDevice(desc, "/device:XLA_CPU:0");
+ auto op = TF_FinishOperation(desc, s_);
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+ ASSERT_NE(op, nullptr);
+
+ // Create a session for this graph.
+ CSession csession(host_graph_, s_, /*use_XLA*/ true);
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+ // Run the graph.
+ csession.SetInputs({{feed, Int32Tensor(17)}});
+ csession.SetOutputs({op});
+ csession.Run(s_);
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+ TF_Tensor* out = csession.output_tensor(0);
+ ASSERT_TRUE(out != nullptr);
+ EXPECT_EQ(TF_INT32, TF_TensorType(out));
+ EXPECT_EQ(0, TF_NumDims(out)); // scalar
+ ASSERT_EQ(sizeof(int32), TF_TensorByteSize(out));
+ int32* output_contents = static_cast(TF_TensorData(out));
+ EXPECT_EQ(-17, *output_contents);
+
+ // Clean up
+ csession.CloseAndDelete(s_);
+ ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
+
+ TF_DeleteFunction(func);
+}
+#endif // TENSORFLOW_EAGER_USE_XLA
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index aa2a537f03be31ae45ff3d6f7815b449d661cf9c..03516c39dc970aa23967107d3a0446da94669465 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -259,8 +259,8 @@ TEST(CAPI, DeprecatedSession) {
TF_Run(session, run_options, nullptr, nullptr, 0, nullptr, nullptr, 0,
nullptr, 0, run_metadata, s);
EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s)) << TF_Message(s);
- EXPECT_EQ(std::string("Session was not created with a graph before Run()!"),
- std::string(TF_Message(s)));
+ EXPECT_EQ("Session was not created with a graph before Run()!",
+ string(TF_Message(s)));
TF_DeleteBuffer(run_metadata);
TF_DeleteBuffer(run_options);
@@ -1224,8 +1224,8 @@ class CApiColocationTest : public ::testing::Test {
TF_OperationGetAttrMetadata(op, tensorflow::kColocationAttrName, s_);
if (expected.empty()) {
ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_);
- EXPECT_EQ(std::string("Operation 'add' has no attr named '_class'."),
- std::string(TF_Message(s_)));
+ EXPECT_EQ("Operation 'add' has no attr named '_class'.",
+ string(TF_Message(s_)));
return;
}
EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
@@ -1369,16 +1369,16 @@ TEST(CAPI, SavedModel) {
input.flat()(i) = example.SerializeAsString();
}
- const tensorflow::string input_op_name =
- std::string(tensorflow::ParseTensorName(input_name).first);
+ const tensorflow::string input_op_name(
+ tensorflow::ParseTensorName(input_name).first);
TF_Operation* input_op =
TF_GraphOperationByName(graph, input_op_name.c_str());
ASSERT_TRUE(input_op != nullptr);
csession.SetInputs({{input_op, TF_TensorFromTensor(input, s)}});
ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
- const tensorflow::string output_op_name =
- std::string(tensorflow::ParseTensorName(output_name).first);
+ const tensorflow::string output_op_name(
+ tensorflow::ParseTensorName(output_name).first);
TF_Operation* output_op =
TF_GraphOperationByName(graph, output_op_name.c_str());
ASSERT_TRUE(output_op != nullptr);
diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc
index 24eb6c069b21349fce288db3e79fbf14e824ad11..f15d9ee20adb31a0b76e2cd0d1e67f17a9deff05 100644
--- a/tensorflow/c/c_test_util.cc
+++ b/tensorflow/c/c_test_util.cc
@@ -26,6 +26,10 @@ limitations under the License.
using tensorflow::GraphDef;
using tensorflow::NodeDef;
+static void BoolDeallocator(void* data, size_t, void* arg) {
+ delete[] static_cast(data);
+}
+
static void Int32Deallocator(void* data, size_t, void* arg) {
delete[] static_cast(data);
}
@@ -38,6 +42,14 @@ static void FloatDeallocator(void* data, size_t, void* arg) {
delete[] static_cast(data);
}
+TF_Tensor* BoolTensor(bool v) {
+ const int num_bytes = sizeof(bool);
+ bool* values = new bool[1];
+ values[0] = v;
+ return TF_NewTensor(TF_BOOL, nullptr, 0, values, num_bytes, &BoolDeallocator,
+ nullptr);
+}
+
TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values) {
int64_t num_values = 1;
for (int i = 0; i < num_dims; ++i) {
@@ -131,6 +143,12 @@ TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s,
return op;
}
+TF_Operation* ScalarConst(bool v, TF_Graph* graph, TF_Status* s,
+ const char* name) {
+ unique_tensor_ptr tensor(BoolTensor(v), TF_DeleteTensor);
+ return Const(tensor.get(), graph, s, name);
+}
+
TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s,
const char* name) {
unique_tensor_ptr tensor(Int32Tensor(v), TF_DeleteTensor);
diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h
index 38313d647ca93d4779bb1325f8ed7bde4b743879..7eeb1ee5e17ad7e5644f8bc8a18ca967b108475d 100644
--- a/tensorflow/c/c_test_util.h
+++ b/tensorflow/c/c_test_util.h
@@ -31,6 +31,8 @@ using ::tensorflow::string;
typedef std::unique_ptr
unique_tensor_ptr;
+TF_Tensor* BoolTensor(int32_t v);
+
// Create a tensor with values of type TF_INT8 provided by `values`.
TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values);
@@ -55,6 +57,9 @@ TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s,
TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s,
const char* name = "const");
+TF_Operation* ScalarConst(bool v, TF_Graph* graph, TF_Status* s,
+ const char* name = "scalar");
+
TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s,
const char* name = "scalar");
diff --git a/tensorflow/c/checkpoint_reader.cc b/tensorflow/c/checkpoint_reader.cc
index 74bc25a491ac01cb725d1c004197e48727c30230..d3311f0cd06f2b151c3567735eb41b5baf72e102 100644
--- a/tensorflow/c/checkpoint_reader.cc
+++ b/tensorflow/c/checkpoint_reader.cc
@@ -125,7 +125,7 @@ CheckpointReader::BuildV2VarMaps() {
const auto& slice_proto = entry.slices(i);
CHECK(filtered_keys
.insert(EncodeTensorNameSlice(
- std::string(v2_reader_->key()) /* full var's name */,
+ string(v2_reader_->key()) /* full var's name */,
TensorSlice(slice_proto)))
.second);
}
@@ -138,11 +138,11 @@ CheckpointReader::BuildV2VarMaps() {
new TensorSliceReader::VarToDataTypeMap);
v2_reader_->Seek(kHeaderEntryKey);
for (v2_reader_->Next(); v2_reader_->Valid(); v2_reader_->Next()) {
- if (filtered_keys.count(std::string(v2_reader_->key())) > 0) continue;
+ if (filtered_keys.count(string(v2_reader_->key())) > 0) continue;
CHECK(entry.ParseFromArray(v2_reader_->value().data(),
v2_reader_->value().size()))
<< entry.InitializationErrorString();
- string key = std::string(v2_reader_->key());
+ string key(v2_reader_->key());
(*var_to_shape_map)[key] = TensorShape(entry.shape());
(*var_to_data_type_map)[key] = DataType(entry.dtype());
}
diff --git a/tensorflow/c/checkpoint_reader.h b/tensorflow/c/checkpoint_reader.h
index 4de1300a7f66a8b4eb8074819432fd7dd597bb15..91654c8d4fb8067ae1fb525ebaa6c54689085545 100644
--- a/tensorflow/c/checkpoint_reader.h
+++ b/tensorflow/c/checkpoint_reader.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#ifndef TENSORFLOW_C_CHECKPOINT_READER_H
-#define TENSORFLOW_C_CHECKPOINT_READER_H
+#ifndef TENSORFLOW_C_CHECKPOINT_READER_H_
+#define TENSORFLOW_C_CHECKPOINT_READER_H_
#include
#include
@@ -79,4 +79,4 @@ class CheckpointReader {
} // namespace checkpoint
} // namespace tensorflow
-#endif // TENSORFLOW_C_CHECKPOINT_READER_H
+#endif // TENSORFLOW_C_CHECKPOINT_READER_H_
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 37be52f57d865c1e59611540d5dab04b59e89444..3ee31a6a7ac641bbd3fc4c05568b61e433a1d523 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -68,7 +68,10 @@ tf_cuda_library(
tf_cuda_library(
name = "c_api_internal",
hdrs = ["c_api_internal.h"],
- visibility = ["//tensorflow:internal"],
+ visibility = [
+ "//learning/deepmind/courier:__pkg__",
+ "//tensorflow:internal",
+ ],
deps = [
":c_api",
"//tensorflow/c:c_api",
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
old mode 100644
new mode 100755
index a0a44440c891c4b9bd6d43299e0ececa25a6b709..0bf3d9542b72ecff916986ab809e8793b796d14c
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -110,7 +110,7 @@ tensorflow::Status GetAllRemoteDevices(
tensorflow::Status CreateRemoteContexts(
const std::vector& remote_workers, int64 rendezvous_id,
- const tensorflow::ServerDef& server_def,
+ int keep_alive_secs, const tensorflow::ServerDef& server_def,
tensorflow::eager::EagerClientCache* remote_eager_workers, bool async,
tensorflow::gtl::FlatMap* remote_contexts) {
for (int i = 0; i < remote_workers.size(); i++) {
@@ -129,6 +129,7 @@ tensorflow::Status CreateRemoteContexts(
request.mutable_server_def()->set_job_name(parsed_name.job);
request.mutable_server_def()->set_task_index(parsed_name.task);
request.set_async(async);
+ request.set_keep_alive_secs(keep_alive_secs);
auto* eager_client = remote_eager_workers->GetClient(remote_worker);
if (eager_client == nullptr) {
return tensorflow::errors::Internal(
@@ -151,7 +152,8 @@ tensorflow::Status CreateRemoteContexts(
}
tensorflow::Status UpdateTFE_ContextWithServerDef(
- const tensorflow::ServerDef& server_def, TFE_Context* ctx) {
+ int keep_alive_secs, const tensorflow::ServerDef& server_def,
+ TFE_Context* ctx) {
// We don't use the TF_RETURN_IF_ERROR macro directly since that destroys the
// server object (which currently CHECK-fails) and we miss the error, instead,
// we log the error, and then return to allow the user to see the error
@@ -202,8 +204,8 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
// Initialize remote eager workers.
tensorflow::gtl::FlatMap remote_contexts;
LOG_AND_RETURN_IF_ERROR(CreateRemoteContexts(
- remote_workers, rendezvous_id, server_def, remote_eager_workers.get(),
- ctx->context.Async(), &remote_contexts));
+ remote_workers, rendezvous_id, keep_alive_secs, server_def,
+ remote_eager_workers.get(), ctx->context.Async(), &remote_contexts));
tensorflow::RemoteRendezvous* r =
grpc_server->worker_env()->rendezvous_mgr->Find(rendezvous_id);
@@ -222,9 +224,10 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
auto* device_mgr = grpc_server->worker_env()->device_mgr;
- ctx->context.InitializeRemote(
- std::move(server), std::move(remote_eager_workers),
- std::move(remote_device_mgr), remote_contexts, r, device_mgr);
+ ctx->context.InitializeRemote(std::move(server),
+ std::move(remote_eager_workers),
+ std::move(remote_device_mgr), remote_contexts,
+ r, device_mgr, keep_alive_secs);
return tensorflow::Status::OK();
#undef LOG_AND_RETURN_IF_ERROR
@@ -241,8 +244,8 @@ void TFE_ContextOptionsSetConfig(TFE_ContextOptions* options, const void* proto,
}
void TFE_ContextOptionsSetAsync(TFE_ContextOptions* options,
- unsigned char async) {
- options->async = async;
+ unsigned char enable) {
+ options->async = enable;
}
void TFE_ContextOptionsSetDevicePlacementPolicy(
TFE_ContextOptions* options, TFE_ContextDevicePlacementPolicy policy) {
@@ -250,9 +253,9 @@ void TFE_ContextOptionsSetDevicePlacementPolicy(
}
TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx,
- unsigned char async,
+ unsigned char enable,
TF_Status* status) {
- status->status = ctx->context.SetAsyncForThread(async);
+ status->status = ctx->context.SetAsyncForThread(enable);
}
void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; }
@@ -270,7 +273,20 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
new tensorflow::IntraProcessRendezvous(device_mgr.get());
return new TFE_Context(opts->session_options.options, opts->policy,
- opts->async, std::move(device_mgr), r);
+ opts->async, device_mgr.release(),
+ /*device_mgr_owned*/ true, r);
+}
+
+TFE_Context* TFE_NewContextFromSession(const TFE_ContextOptions* opts,
+ TF_Session* sess, TF_Status* status) {
+ const tensorflow::DeviceMgr* device_mgr = nullptr;
+ status->status = sess->session->LocalDeviceManager(&device_mgr);
+ if (!status->status.ok()) return nullptr;
+ tensorflow::Rendezvous* r =
+ new tensorflow::IntraProcessRendezvous(device_mgr);
+ return new TFE_Context(opts->session_options.options, opts->policy,
+ opts->async, device_mgr, /*device_mgr_owned*/ false,
+ r);
}
void TFE_DeleteContext(TFE_Context* ctx) { delete ctx; }
@@ -288,6 +304,7 @@ void TFE_ContextClearCaches(TFE_Context* ctx) { ctx->context.ClearCaches(); }
// Set server_def on the context, possibly updating it.
TF_CAPI_EXPORT extern void TFE_ContextSetServerDef(TFE_Context* ctx,
+ int keep_alive_secs,
const void* proto,
size_t proto_len,
TF_Status* status) {
@@ -297,7 +314,8 @@ TF_CAPI_EXPORT extern void TFE_ContextSetServerDef(TFE_Context* ctx,
"Invalid tensorflow.ServerDef protocol buffer");
return;
}
- status->status = UpdateTFE_ContextWithServerDef(server_def, ctx);
+ status->status =
+ UpdateTFE_ContextWithServerDef(keep_alive_secs, server_def, ctx);
}
void TFE_ContextSetThreadLocalDevicePlacementPolicy(
@@ -357,6 +375,17 @@ int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status) {
return result;
}
+int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h, TF_Status* status) {
+ if (h == nullptr || h->handle == nullptr) {
+ status->status = tensorflow::errors::InvalidArgument(
+ "The passed in handle is a nullptr");
+ return -1;
+ }
+ tensorflow::int64 result;
+ status->status = h->handle->NumElements(&result);
+ return result;
+}
+
int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index,
TF_Status* status) {
if (h == nullptr || h->handle == nullptr) {
@@ -381,6 +410,19 @@ const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) {
: d->name().c_str();
}
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_TensorHandleCopySharingTensor(
+ TFE_TensorHandle* h, TF_Status* status) {
+ if (h == nullptr || h->handle == nullptr) {
+ status->status = tensorflow::errors::InvalidArgument(
+ "The passed in handle is a nullptr");
+ return nullptr;
+ }
+
+ h->handle->Ref();
+
+ return new TFE_TensorHandle(h->handle);
+}
+
TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) {
if (h == nullptr || h->handle == nullptr) {
status->status = tensorflow::errors::InvalidArgument(
@@ -536,6 +578,13 @@ void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name,
op->operation.MutableAttrs()->Set(attr_name, attr_value);
}
+void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor,
+ TF_Status* status) {
+ tensorflow::Tensor t;
+ status->status = TF_TensorToTensor(tensor, &t);
+ if (status->status.ok()) op->operation.MutableAttrs()->Set(attr_name, t);
+}
+
void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name,
const void* const* values, const size_t* lengths,
int num_values) {
@@ -719,6 +768,10 @@ TFE_Op* GetFunc(TFE_Context* ctx, const tensorflow::NameAttrList& func,
}
} // namespace
+void TFE_ContextStartStep(TFE_Context* ctx) { ctx->context.StartStep(); }
+
+void TFE_ContextEndStep(TFE_Context* ctx) { ctx->context.EndStep(); }
+
namespace tensorflow {
void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op,
const tensorflow::AttrValue& default_value,
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
old mode 100644
new mode 100755
index 25cf7adbc737411e93afe13a69850435994a1cd2..6323f8a053197bb7069acf2d43214fb78c36f436
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -76,7 +76,7 @@ typedef enum TFE_ContextDevicePlacementPolicy {
// Sets the default execution mode (sync/async). Note that this can be
// overridden per thread using TFE_ContextSetAsyncForThread.
TF_CAPI_EXPORT extern void TFE_ContextOptionsSetAsync(TFE_ContextOptions*,
- unsigned char async);
+ unsigned char enable);
TF_CAPI_EXPORT extern void TFE_ContextOptionsSetDevicePlacementPolicy(
TFE_ContextOptions*, TFE_ContextDevicePlacementPolicy);
@@ -114,7 +114,7 @@ TFE_ContextGetDevicePlacementPolicy(TFE_Context*);
// Overrides the execution mode (sync/async) for the current thread.
TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context*,
- unsigned char async,
+ unsigned char enable,
TF_Status* status);
// A tensorflow.ServerDef specifies remote workers (in addition to the current
@@ -124,6 +124,7 @@ TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context*,
// If the following is set, all servers identified by the
// ServerDef must be up when the context is created.
TF_CAPI_EXPORT extern void TFE_ContextSetServerDef(TFE_Context* ctx,
+ int keep_alive_secs,
const void* proto,
size_t proto_len,
TF_Status* status);
@@ -162,6 +163,8 @@ TF_CAPI_EXPORT extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h);
// This function will block till the operation that produces `h` has completed.
TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h,
TF_Status* status);
+TF_CAPI_EXPORT extern int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h,
+ TF_Status* status);
// This function will block till the operation that produces `h` has completed.
TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h,
int dim_index,
@@ -170,6 +173,12 @@ TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h,
TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceName(
TFE_TensorHandle* h, TF_Status* status);
+// Return a pointer to a new TFE_TensorHandle that shares the underlying tensor
+// with `h`. On success, `status` is set to OK. On failure, `status` reflects
+// the error and a nullptr is returned.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_TensorHandleCopySharingTensor(
+ TFE_TensorHandle* h, TF_Status* status);
+
// This function will block till the operation that produces `h` has
// completed. The memory returned might alias the internal memory used by
// TensorFlow. Hence, callers should not mutate this memory (for example by
@@ -304,6 +313,11 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunction(TFE_Op* op,
const char* attr_name,
const TFE_Op* value);
+TF_CAPI_EXPORT extern void TFE_OpSetAttrTensor(TFE_Op* op,
+ const char* attr_name,
+ TF_Tensor* tensor,
+ TF_Status* status);
+
TF_CAPI_EXPORT extern void TFE_OpSetAttrStringList(TFE_Op* op,
const char* attr_name,
const void* const* values,
@@ -380,6 +394,16 @@ TF_CAPI_EXPORT extern void TFE_ContextExportRunMetadata(TFE_Context* ctx,
TF_Buffer* buf,
TF_Status* status);
+// Some TF ops need a step container to be set to limit the lifetime of some
+// resources (mostly TensorArray and Stack, used in while loop gradients in
+// graph mode). Calling this on a context tells it to start a step.
+TF_CAPI_EXPORT extern void TFE_ContextStartStep(TFE_Context* ctx);
+
+// Ends a step. When there is no active step (that is, every started step has
+// been ended) step containers will be cleared. Note: it is not safe to call
+// TFE_ContextEndStep while ops which rely on the step container may be running.
+TF_CAPI_EXPORT extern void TFE_ContextEndStep(TFE_Context* ctx);
+
#ifdef __cplusplus
} /* end extern "C" */
#endif
diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h
index a5c0681e2e4eddae08954d9d0178ca96a3f8f29a..104d52430cf7aa14d4d2a335a1b96e667f21ce87 100644
--- a/tensorflow/c/eager/c_api_internal.h
+++ b/tensorflow/c/eager/c_api_internal.h
@@ -62,15 +62,14 @@ struct TFE_ContextOptions {
};
struct TFE_Context {
- explicit TFE_Context(const tensorflow::SessionOptions& opts,
- TFE_ContextDevicePlacementPolicy default_policy,
- bool async,
- std::unique_ptr device_mgr,
- tensorflow::Rendezvous* rendezvous)
+ TFE_Context(const tensorflow::SessionOptions& opts,
+ TFE_ContextDevicePlacementPolicy default_policy, bool async,
+ const tensorflow::DeviceMgr* device_mgr, bool device_mgr_owned,
+ tensorflow::Rendezvous* rendezvous)
: context(opts,
static_cast(
default_policy),
- async, std::move(device_mgr), rendezvous) {}
+ async, device_mgr, device_mgr_owned, rendezvous) {}
tensorflow::EagerContext context;
};
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index 00a0a71fca5537bb65c76cb39c080c59160c5960..55331022b9dbd0696928fa44430f340f371432ac 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -151,7 +151,7 @@ void TestRemoteExecute(bool async) {
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TFE_DeleteContextOptions(opts);
- TFE_ContextSetServerDef(ctx, serialized.data(), serialized.size(), status);
+ TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TFE_TensorHandle* h0_task0 = TestMatrixTensorHandle();
@@ -239,7 +239,7 @@ void TestRemoteExecuteSilentCopies(bool async) {
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TFE_DeleteContextOptions(opts);
- TFE_ContextSetServerDef(ctx, serialized.data(), serialized.size(), status);
+ TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TFE_TensorHandle* h0_task0 = TestMatrixTensorHandle();
@@ -371,7 +371,7 @@ void TestRemoteExecuteChangeServerDef(bool async) {
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TFE_DeleteContextOptions(opts);
- TFE_ContextSetServerDef(ctx, serialized.data(), serialized.size(), status);
+ TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
const char remote_device_name[] =
@@ -397,7 +397,7 @@ void TestRemoteExecuteChangeServerDef(bool async) {
ASSERT_TRUE(s.ok()) << s.error_message();
ASSERT_TRUE(worker_server->Start().ok());
- TFE_ContextSetServerDef(ctx, serialized.data(), serialized.size(), status);
+ TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
// Create a new tensor_handle.
@@ -1471,4 +1471,86 @@ void BM_ReadVariable(int iters) {
}
BENCHMARK(BM_ReadVariable);
+TEST(CAPI, StringAttributes) {
+ // Test that TFE_OpSetAttrString doesn't hold on to the value after it
+ // returns.
+ TF_Status* status = TF_NewStatus();
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TFE_Context* ctx = TFE_NewContext(opts, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteContextOptions(opts);
+
+ std::vector dims(4, 1);
+ TFE_Op* op = TFE_NewOp(ctx, "AvgPool", status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ TF_Tensor* tensor =
+ TF_AllocateTensor(TF_FLOAT, dims.data(), dims.size(), sizeof(float));
+ float tensor_data[] = {1};
+ memcpy(TF_TensorData(tensor), tensor_data, TF_TensorByteSize(tensor));
+ TFE_TensorHandle* tensor_handle = TFE_NewTensorHandle(tensor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_OpAddInput(op, tensor_handle, status);
+ TF_DeleteTensor(tensor);
+ TFE_DeleteTensorHandle(tensor_handle);
+
+ std::vector values(4, 1);
+ TFE_OpSetAttrIntList(op, "ksize", values.data(), values.size());
+ TFE_OpSetAttrIntList(op, "strides", values.data(), values.size());
+
+ const int BUFFER_SIZE = 10;
+ char buffer[BUFFER_SIZE];
+ std::strncpy(buffer, "VALID", BUFFER_SIZE);
+ TFE_OpSetAttrString(op, "padding", buffer, std::strlen(buffer));
+ // Overwriting value in "buffer", should be fine since TFE_Op
+ // shouldn't be holding on to it.
+ std::strncpy(buffer, "NHWC", BUFFER_SIZE);
+ TFE_OpSetAttrString(op, "data_format", buffer, std::strlen(buffer));
+
+ TFE_OpSetAttrType(op, "T", TF_FLOAT);
+
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ TFE_TensorHandle* retvals[1];
+ int num_retvals = 1;
+ TFE_Execute(op, &retvals[0], &num_retvals, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ ASSERT_EQ(1, num_retvals);
+
+ tensor = TFE_TensorHandleResolve(retvals[0], status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ EXPECT_EQ(4, TF_TensorByteSize(tensor));
+ TF_DeleteTensor(tensor);
+ TFE_DeleteTensorHandle(retvals[0]);
+
+ TFE_DeleteOp(op);
+
+ TFE_DeleteContext(ctx);
+ TF_DeleteStatus(status);
+}
+
+TEST(CAPI, TestTFE_TensorHandleCopySharingUnderlyingTensorHandle) {
+ TFE_TensorHandle* h = TestMatrixTensorHandle();
+ EXPECT_EQ(TF_FLOAT, TFE_TensorHandleDataType(h));
+
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+
+ TFE_TensorHandle* h_shares_tensor =
+ TFE_TensorHandleCopySharingTensor(h, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ TF_Tensor* t = TFE_TensorHandleResolve(h_shares_tensor, status.get());
+ ASSERT_EQ(16, TF_TensorByteSize(t));
+ float data[4] = {0};
+ memcpy(&data[0], TF_TensorData(t), TF_TensorByteSize(t));
+ EXPECT_EQ(1.0, data[0]);
+ EXPECT_EQ(2.0, data[1]);
+ EXPECT_EQ(3.0, data[2]);
+ EXPECT_EQ(4.0, data[3]);
+ TF_DeleteTensor(t);
+
+ TFE_DeleteTensorHandle(h);
+ TFE_DeleteTensorHandle(h_shares_tensor);
+}
} // namespace
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 1adb0458c35193117b5fa5cfe9ceffbaaf699af7..41b5b8ff36e16100e349cb909dc79d90fa4866b0 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -29,15 +29,8 @@ limitations under the License.
namespace tensorflow {
namespace eager {
-// Information about a tensor.
-struct TapeTensor {
- int64 id; // Expected to be unique in the lifetime of this process.
- DataType dtype;
- TensorShape shape;
-};
-
// Represents an entry in the tape.
-template
+template
struct OpTapeEntry {
string op_type;
std::vector output_tensor_info;
@@ -57,8 +50,8 @@ struct OpTapeEntry {
using TensorTape = gtl::FlatMap;
// Map from operation-id to tape entry.
-template
-using OpTape = gtl::FlatMap>;
+template
+using OpTape = gtl::FlatMap>;
// Operations the tape needs to perform on tensors to do backpropagation. Named
// "vspace" because a subset of these are related to a vector space, such as
@@ -79,7 +72,7 @@ using OpTape = gtl::FlatMap>;
// TODO(apassos) provide concrete template instantiations for TFE_TensorHandle
// specialization, which is blocked by quite a few things needing to loop back
// into python now.
-template
+template
class VSpace {
public:
virtual ~VSpace() {}
@@ -93,10 +86,10 @@ class VSpace {
gtl::ArraySlice gradient_tensors) const = 0;
// Returns a tensor of the right shape and dtype filled with zeros.
- virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0;
+ virtual Gradient* Zeros(const TapeTensor& tensor) const = 0;
// Returns a Tensor which is filled with ones and like the input.
- virtual Gradient* Ones(TensorShape shape, DataType dtype) const = 0;
+ virtual Gradient* Ones(const TapeTensor& tensor) const = 0;
// Calls the passed-in backward function.
virtual Status CallBackwardFunction(
@@ -114,7 +107,7 @@ class VSpace {
// Traces the execution of operations, doing eager garbage collection, and
// exporting a full trace so other code can do backpropagation. Not thread-safe.
-template
+template
class GradientTape {
public:
// If `persistent` is true, GradientTape will not eagerly delete backward
@@ -134,7 +127,7 @@ class GradientTape {
void Watch(int64 tensor_id);
void RecordOperation(
- const string& op_type, gtl::ArraySlice output_tensors,
+ const string& op_type, std::vector& output_tensors,
gtl::ArraySlice input_tensor_id,
gtl::ArraySlice input_dtypes,
BackwardFunction* backward_function,
@@ -146,17 +139,18 @@ class GradientTape {
// once) and produces the gradient of the target tensors with respect to the
// source tensors. The output gradients are used if not empty and not
// null. The result is populated with one tensor per target element.
- Status ComputeGradient(const VSpace& vspace,
- gtl::ArraySlice target_tensor_ids,
- gtl::ArraySlice source_tensor_id,
- gtl::ArraySlice output_gradients,
- std::vector* result);
+ Status ComputeGradient(
+ const VSpace& vspace,
+ gtl::ArraySlice target_tensor_ids,
+ gtl::ArraySlice source_tensor_id,
+ gtl::ArraySlice output_gradients,
+ std::vector* result);
bool IsPersistent() const { return persistent_; }
private:
TensorTape tensor_tape_;
- OpTape op_tape_;
+ OpTape op_tape_;
int64 next_op_id_{0};
// Map from tensor id to number of remaining usages (i.e. how many entries in
@@ -186,8 +180,8 @@ inline bool IsDtypeTrainable(DataType dtype) {
}
}
-template
-bool GradientTape::ShouldRecord(
+template
+bool GradientTape::ShouldRecord(
gtl::ArraySlice tensor_ids,
gtl::ArraySlice dtypes) {
CHECK_EQ(tensor_ids.size(), dtypes.size());
@@ -201,14 +195,15 @@ bool GradientTape::ShouldRecord(
return false;
}
-template
-void GradientTape::Watch(int64 tensor_id) {
+template
+void GradientTape::Watch(
+ int64 tensor_id) {
tensor_tape_.emplace(tensor_id, -1);
}
-template
-void GradientTape::RecordOperation(
- const string& op_type, gtl::ArraySlice output_tensors,
+template
+void GradientTape::RecordOperation(
+ const string& op_type, std::vector& output_tensors,
gtl::ArraySlice input_tensor_id,
gtl::ArraySlice input_dtypes,
BackwardFunction* backward_function,
@@ -229,16 +224,18 @@ void GradientTape::RecordOperation(
for (const TapeTensor& o : output_tensors) {
// Note: the tensor can have already been watched and hence be in the tape,
// so we cannot check that we're inserting it here.
- tensor_tape_[o.id] = op_id;
- tensor_usage_[o.id] = 1;
+ tensor_tape_[o.GetID()] = op_id;
+ tensor_usage_[o.GetID()] = 1;
tensors.push_back(o);
}
- op_tape_[op_id] = OpTapeEntry{
- op_type, tensors, ids, backward_function, backward_function_deleter};
+ op_tape_[op_id] = OpTapeEntry{
+ op_type, std::move(tensors), ids, backward_function,
+ backward_function_deleter};
}
-template
-void GradientTape::DeleteTrace(int64 tensor_id) {
+template
+void GradientTape::DeleteTrace(
+ int64 tensor_id) {
auto it = tensor_usage_.find(tensor_id);
if (it == tensor_usage_.end()) {
return;
@@ -261,7 +258,7 @@ void GradientTape::DeleteTrace(int64 tensor_id) {
auto op_it = op_tape_.find(op_id);
CHECK(op_it != op_tape_.end());
for (const auto& output : op_it->second.output_tensor_info) {
- if (tensor_usage_.find(output.id) != tensor_usage_.end()) {
+ if (tensor_usage_.find(output.GetID()) != tensor_usage_.end()) {
// Found a usage for an output, so cannot delete the op.
return;
}
@@ -304,9 +301,9 @@ void GradientTape::DeleteTrace(int64 tensor_id) {
namespace {
-template
+template
struct BackpropInitialState {
- OpTape op_tape;
+ OpTape op_tape;
// Map from tensor ID to how many references still exist for this tensor in
// the tape.
@@ -322,17 +319,17 @@ struct BackpropInitialState {
// If `persistent_tape` is false, op_tape is cleared and backwards functions
// not needed for gradient computation are deleted. Backwards functions that
// are needed, are copied and returned in BackpropInitialState.
-template
-BackpropInitialState PrepareBackprop(
+template
+BackpropInitialState PrepareBackprop(
gtl::ArraySlice target, const TensorTape& tensor_tape,
- OpTape* op_tape, const gtl::FlatSet& sources_set,
- bool persistent_tape) {
+ OpTape* op_tape,
+ const gtl::FlatSet& sources_set, bool persistent_tape) {
std::vector tensor_stack;
tensor_stack.reserve(target.size());
for (auto t : target) {
tensor_stack.push_back(t);
}
- BackpropInitialState result;
+ BackpropInitialState result;
while (!tensor_stack.empty()) {
int64 tensor_id = tensor_stack.back();
tensor_stack.pop_back();
@@ -383,9 +380,9 @@ BackpropInitialState PrepareBackprop(
return result;
}
-template
+template
std::vector InitialStack(
- const OpTape& op_tape,
+ const OpTape& op_tape,
const gtl::FlatMap& op_missing_tensor) {
std::vector result;
for (auto& op_entry : op_tape) {
@@ -396,13 +393,13 @@ std::vector InitialStack(
return result;
}
-template
-Status InitialGradients(const VSpace& vspace,
- gtl::ArraySlice target_tensor_ids,
- gtl::ArraySlice output_gradients,
- const TensorTape& tensor_tape,
- const OpTape& op_tape,
- gtl::FlatMap>* result) {
+template
+Status InitialGradients(
+ const VSpace& vspace,
+ gtl::ArraySlice target_tensor_ids,
+ gtl::ArraySlice output_gradients, const TensorTape& tensor_tape,
+ const OpTape& op_tape,
+ gtl::FlatMap>* result) {
for (int i = 0; i < target_tensor_ids.size(); ++i) {
const int64 id = target_tensor_ids[i];
if (output_gradients.empty() || output_gradients[i] == nullptr) {
@@ -416,11 +413,10 @@ Status InitialGradients(const VSpace& vspace,
}
bool found = false;
for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) {
- if (op_it->second.output_tensor_info[j].id == id) {
+ if (op_it->second.output_tensor_info[j].GetID() == id) {
found = true;
(*result)[id].push_back(
- vspace.Ones(op_it->second.output_tensor_info[j].shape,
- op_it->second.output_tensor_info[j].dtype));
+ vspace.Ones(op_it->second.output_tensor_info[j]));
break;
}
}
@@ -440,6 +436,27 @@ Status InitialGradients(const VSpace& vspace,
return Status::OK();
}
+// TODO(agarwal): use an automatic mechanism for handling None arguments to
+// gradient functions.
+//
+// Some gradient functions can accept None arguments for gradients. The
+// following maps the operation name to the indices at which the corresponding
+// gradient function can accept None values. e.g. FusedBatchNorm outputs 5
+// values and hence receives 5 gradient values during backprop. However the
+// gradient function uses only the first of those values and ignores the rest.
+// The entry, "FusedBatchNorm": [1, 2, 3, 4], indicates that only the gradient
+// corresponding to index 0 is used, and the gradient values at indices 1-4 are
+// ignored (and hence can be None). The backprop algorithm can then leverage
+// this by not constructing zeros to pass for those indices.
+gtl::FlatMap>* FunctionsAcceptingNoneForIndicesMap() {
+ static auto* const m = new gtl::FlatMap>({
+ {"SoftmaxCrossEntropyWithLogits", {1}},
+ {"SparseSoftmaxCrossEntropyWithLogits", {1}},
+ {"FusedBatchNorm", {1, 2, 3, 4}},
+ });
+ return m;
+}
+
} // namespace
// If over kMinAggregateCount gradients are accumulated and the total
@@ -448,16 +465,16 @@ Status InitialGradients(const VSpace& vspace,
constexpr int kMinAggregateCount = 4;
constexpr int kMinAggregateBytes = 128 * 1024 * 1024;
-template
-Status GradientTape::ComputeGradient(
- const VSpace& vspace,
+template
+Status GradientTape::ComputeGradient(
+ const VSpace& vspace,
gtl::ArraySlice target_tensor_ids,
gtl::ArraySlice source_tensor_ids,
gtl::ArraySlice output_gradients,
std::vector* result) {
gtl::FlatSet sources_set(source_tensor_ids.begin(),
source_tensor_ids.end());
- BackpropInitialState state = PrepareBackprop(
+ BackpropInitialState state = PrepareBackprop(
target_tensor_ids, tensor_tape_, &op_tape_, sources_set, persistent_);
std::vector op_stack =
InitialStack(state.op_tape, state.op_missing_tensor);
@@ -485,10 +502,6 @@ Status GradientTape::ComputeGradient(
VLOG(1) << " " << t;
}
}
- gtl::FlatMap> functions_accept_none_for_indices({
- {"SoftmaxCrossEntropyWithLogits", {1}},
- {"FusedBatchNorm", {1, 2, 3, 4}},
- });
while (!op_stack.empty()) {
const int64 op = op_stack.back();
VLOG(1) << "Popped " << op;
@@ -505,18 +518,16 @@ Status GradientTape::ComputeGradient(
out_gradients.reserve(trace.output_tensor_info.size());
bool any_gradient_nonzero = false;
for (int i = 0; i < trace.output_tensor_info.size(); ++i) {
- const int64 id = trace.output_tensor_info[i].id;
+ const int64 id = trace.output_tensor_info[i].GetID();
auto grad_it = gradients.find(id);
if (grad_it == gradients.end()) {
auto func_name_it =
- functions_accept_none_for_indices.find(trace.op_type);
- if (func_name_it != functions_accept_none_for_indices.end() &&
+ FunctionsAcceptingNoneForIndicesMap()->find(trace.op_type);
+ if (func_name_it != FunctionsAcceptingNoneForIndicesMap()->end() &&
func_name_it->second.find(i) != func_name_it->second.end()) {
out_gradients.push_back(nullptr);
} else {
- out_gradients.push_back(
- vspace.Zeros(trace.output_tensor_info[i].shape,
- trace.output_tensor_info[i].dtype));
+ out_gradients.push_back(vspace.Zeros(trace.output_tensor_info[i]));
}
} else {
any_gradient_nonzero = true;
diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index 8486b585c8587e18e8eea18a893fac0a40ff4a27..247236b760dd8c07bbb08426100b6a4d34296d2e 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -110,7 +110,7 @@ void ExtendSession(TF_Session* session, TF_Status* status) {
session->extend_before_run = false;
}
-std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
+std::string GetHandleShapeAndType(TF_Graph* graph, TF_Output output) {
Node* node = &output.oper->node;
CppShapeInferenceResult::HandleData handle_data;
handle_data.set_is_set(true);
@@ -135,9 +135,8 @@ std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
return result;
}
-void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
- const void* proto, size_t proto_len,
- TF_Status* status) {
+void SetHandleShapeAndType(TF_Graph* graph, TF_Output output, const void* proto,
+ size_t proto_len, TF_Status* status) {
tensorflow::CppShapeInferenceResult::HandleData handle_data;
if (!handle_data.ParseFromArray(proto, proto_len)) {
status->status = tensorflow::errors::InvalidArgument(
diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h
index 4bcb5bde62c8a4df4e68c1ce0daaf459434ceb5d..5cce84020bc68d912d259f51512341eb5f464a2c 100644
--- a/tensorflow/c/python_api.h
+++ b/tensorflow/c/python_api.h
@@ -54,16 +54,17 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require);
void ExtendSession(TF_Session* session, TF_Status* status);
// Returns the serialized CppShapeInferenceResult::HandleData proto for
-// `output` if its a resource tensor, or otherwise returns the empty string.
-std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output);
+// `output` if its a resource or variant tensor, or otherwise returns the empty
+// string.
+std::string GetHandleShapeAndType(TF_Graph* graph, TF_Output output);
// Sets `output` based on `proto`, which should be a serialized
-// CppShapeInferenceResult::HandleData proto.
+// CppShapeInferenceResult::HandleData proto. `output` should be a resource
+// or variant tensor.
// NOTE(skyewm): `proto` is passed a void*/size_t pair instead of a std::string
// because I couldn't get SWIG to work otherwise.
-void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
- const void* proto, size_t proto_len,
- TF_Status* status);
+void SetHandleShapeAndType(TF_Graph* graph, TF_Output output, const void* proto,
+ size_t proto_len, TF_Status* status);
} // namespace tensorflow
#endif // TENSORFLOW_C_PYTHON_API_H_
diff --git a/tensorflow/c/tf_status_helper.h b/tensorflow/c/tf_status_helper.h
index 86e687df205617018d94c19ac34fdc3bf54dcc6f..7661a01de4afcefbb66b33a05534e22d2ba1baa0 100644
--- a/tensorflow/c/tf_status_helper.h
+++ b/tensorflow/c/tf_status_helper.h
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
-#ifndef TENSORFLOW_C_TF_STATUS_HELPER_H
-#define TENSORFLOW_C_TF_STATUS_HELPER_H
+#ifndef TENSORFLOW_C_TF_STATUS_HELPER_H_
+#define TENSORFLOW_C_TF_STATUS_HELPER_H_
#include "tensorflow/c/c_api.h"
#include "tensorflow/core/lib/core/status.h"
@@ -29,4 +29,4 @@ Status StatusFromTF_Status(const TF_Status* tf_status);
} // namespace tensorflow
-#endif // TENSORFLOW_C_TF_STATUS_HELPER_H
+#endif // TENSORFLOW_C_TF_STATUS_HELPER_H_
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 588a45ea43f90c4d9b3d04fea305d2c562ae1d72..b587e63227708427e7fae47f8f4a7b524d963ed9 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -10,11 +10,12 @@ licenses(["notice"]) # Apache 2.0
load(
"//tensorflow:tensorflow.bzl",
- "tf_cc_test",
+ "cc_library_with_android_deps",
"tf_cc_binary",
+ "tf_cc_test",
"tf_copts",
"tf_gen_op_wrappers_cc",
- "cc_library_with_android_deps",
+ "transitive_hdrs",
)
cc_library(
@@ -379,9 +380,11 @@ tf_cc_test(
srcs = ["gradients/math_grad_test.cc"],
deps = [
":cc_ops",
+ ":client_session",
":grad_op_registry",
":grad_testutil",
":gradient_checker",
+ ":gradients",
":math_grad",
":testutil",
"//tensorflow/core:lib_internal",
@@ -626,7 +629,6 @@ tf_cc_binary(
copts = tf_copts(),
linkopts = select({
"//tensorflow:windows": [],
- "//tensorflow:windows_msvc": [],
"//tensorflow:darwin": [
"-lm",
"-lpthread",
@@ -715,3 +717,26 @@ tf_cc_test(
"//tensorflow/core:testlib",
],
)
+
+transitive_hdrs(
+ name = "headers",
+ visibility = ["//tensorflow:__subpackages__"],
+ deps = [
+ ":cc_ops",
+ ":client_session",
+ ":coordinator",
+ ":gradient_checker",
+ ":gradients",
+ ":ops",
+ ":queue_runner",
+ ":remote_fused_graph_ops",
+ ":scope",
+ "//tensorflow/cc/profiler",
+ "//tensorflow/cc/saved_model:constants",
+ "//tensorflow/cc/saved_model:loader",
+ "//tensorflow/cc/saved_model:reader",
+ "//tensorflow/cc/saved_model:signature_constants",
+ "//tensorflow/cc/saved_model:tag_constants",
+ "//tensorflow/cc/tools:freeze_saved_model",
+ ],
+)
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index dfdef88945deca376368edd6f7aa322b1e1cbf94..a32d1b1eb50fc715084f5ee663a732770db1883c 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -466,7 +466,7 @@ string AvoidCPPKeywords(StringPiece name) {
if (IsCPPKeyword(name)) {
return strings::StrCat(name, "_");
}
- return std::string(name);
+ return string(name);
}
void InferArgAttributes(const OpDef::ArgDef& arg,
@@ -508,15 +508,6 @@ bool HasOptionalAttrs(
return false;
}
-const ApiDef::Arg* FindInputArg(StringPiece name, const ApiDef& api_def) {
- for (int i = 0; i < api_def.in_arg_size(); ++i) {
- if (api_def.in_arg(i).name() == name) {
- return &api_def.in_arg(i);
- }
- }
- return nullptr;
-}
-
struct OpInfo {
// graph_op_def: The OpDef used by the runtime, has the names that
// must be used when calling NodeBuilder.
diff --git a/tensorflow/cc/framework/ops.h b/tensorflow/cc/framework/ops.h
index a085e1d6e2de5ad63d11eb8979ae64c26b91366f..0717e7dd4b358d6c212070374bcc3fd2f91ed0ab 100644
--- a/tensorflow/cc/framework/ops.h
+++ b/tensorflow/cc/framework/ops.h
@@ -150,7 +150,7 @@ class Input {
Initializer(const std::initializer_list& v, const TensorShape& shape) {
typedef typename RealType::type RealT;
Tensor t(DataTypeToEnum::v(), shape);
- if (t.NumElements() != v.size()) {
+ if (t.NumElements() != static_cast(v.size())) {
status = errors::InvalidArgument(
"Cannot construct a tensor with ", t.NumElements(),
" from an initializer list with ", v.size(), " elements");
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index 8c886f31711eb014fb9e9d600c9c78cf22073f71..7f6ac4cae78d8d6e118837fce9ae5270336cdc89 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -225,7 +225,7 @@ std::unordered_set Scope::Impl::GetColocationConstraints(
for (const string& entry : node_constraints) {
StringPiece s(entry);
if (str_util::ConsumePrefix(&s, kColocationGroupPrefix)) {
- current_constraints.insert(std::string(s));
+ current_constraints.emplace(s);
}
}
} else {
diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc
index b353accddcb6db9a07c112de03ead2f02c4ee6a6..e9173227aadbf86eab666e6c17bacacb92888572 100644
--- a/tensorflow/cc/gradients/array_grad.cc
+++ b/tensorflow/cc/gradients/array_grad.cc
@@ -120,6 +120,24 @@ Status SplitGrad(const Scope& scope, const Operation& op,
}
REGISTER_GRADIENT_OP("Split", SplitGrad);
+Status FillGrad(const Scope& scope, const Operation& op,
+ const std::vector