diff --git a/tools/bazel.rc b/.bazelrc
similarity index 69%
rename from tools/bazel.rc
rename to .bazelrc
index 660e3d328038b618fefdf96d60863941d3a46edd..1945078789dcd48603ceb322c34ab2cd5af5eb59 100644
--- a/tools/bazel.rc
+++ b/.bazelrc
@@ -24,19 +24,28 @@ build --define framework_shared_object=true
# Please note that MKL on MacOS or windows is still not supported.
# If you would like to use a local MKL instead of downloading, please set the
# environment variable "TF_MKL_ROOT" every time before build.
-build:mkl --define=using_mkl=true
+build:mkl --define=build_with_mkl=true --define=enable_mkl=true
build:mkl -c opt
# This config option is used to enable MKL-DNN open source library only,
# without depending on MKL binary version.
-build:mkl_open_source_only --define=using_mkl_dnn_only=true
+build:mkl_open_source_only --define=build_with_mkl_dnn_only=true
+build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true
build:download_clang --crosstool_top=@local_config_download_clang//:toolchain
build:download_clang --define=using_clang=true
+# Instruct clang to use LLD for linking.
+# This only works with GPU builds currently, since Bazel sets -B/usr/bin in
+# auto-generated CPU crosstool, forcing /usr/bin/ld.lld to be preferred over
+# the downloaded one.
+build:download_clang_use_lld --linkopt='-fuse-ld=lld'
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true
+build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain
+build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true
+
build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true
@@ -52,6 +61,18 @@ build:sycl_asan --define=using_sycl=true --define=using_trisycl=false --copt -fn
build:sycl_trisycl --crosstool_top=@local_config_sycl//crosstool:toolchain
build:sycl_trisycl --define=using_sycl=true --define=using_trisycl=true
+# Options extracted from configure script
+build:gdr --define=with_gdr_support=true
+build:ngraph --define=with_ngraph_support=true
+build:verbs --define=with_verbs_support=true
+
+# Options to disable default on features
+build:noaws --define=no_aws_support=true
+build:nogcp --define=no_gcp_support=true
+build:nohdfs --define=no_hdfs_support=true
+build:nokafka --define=no_kafka_support=true
+build:noignite --define=no_ignite_support=true
+
build --define=use_fast_cpp_protos=true
build --define=allow_oversize_protos=true
build --define=grpc_no_ares=true
@@ -60,5 +81,15 @@ build --spawn_strategy=standalone
build --genrule_strategy=standalone
build -c opt
+# Other build flags.
+build --define=grpc_no_ares=true
+
# Modular TF build options
build:dynamic_kernels --define=dynamic_loaded_kernels=true
+
+# Default paths for TF_SYSTEM_LIBS
+build --define=PREFIX=/usr
+build --define=LIBDIR=$(PREFIX)/lib
+build --define=INCLUDEDIR=$(PREFIX)/include
+
+# Do not commit the tf_configure.bazelrc line
diff --git a/.gitignore b/.gitignore
index 1709610fcd3b46910d703fe7244980e3dd2c2521..cb65f447d4a551266e237714a16d71b58bcfc51d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
.DS_Store
.ipynb_checkpoints
node_modules
-/.bazelrc
/.tf_configure.bazelrc
/bazel-*
/bazel_pip
@@ -14,6 +13,7 @@ __pycache__
*.swp
.vscode/
cmake_build/
+tensorflow/contrib/cmake/_build/
.idea/**
/build/
[Bb]uild/
diff --git a/CODEOWNERS b/CODEOWNERS
index b9f0313cc6d59d3fbdcd014e1a528126d863075a..94cc865479cd6ab5cdb589490d3a2d650f06b160 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,53 +1,67 @@
-# NOTE: Disabled temporarily because it's too noisy on pushes.
# Where component owners are known, add them here.
-# /tensorflow/core/platform/windows/ @mrry
-# /tensorflow/java/ @asimshankar
-# /tensorflow/tensorboard/ @jart @dandelionmane
-# /tensorflow/tools/docs/ @markdaoust
+/tenosrflow/core/debug @caisq
+/tensorflow/core/platform/windows/ @mrry
+/tensorflow/core/platform/s3 @yongtang
+/tensorflow/go @asimshankar
+/tensorflow/java/ @asimshankar
+/tensorflow/python/debug @caisq
+/tensorflow/python/tools/api/generator/ @annarev
+/tensorflow/tensorboard/ @jart
+/tensorflow/tools/docs/ @markdaoust
# contrib
-# NEED OWNER: /tensorflow/contrib/avro/
-# /tensorflow/contrib/batching/ @alextp @chrisolston
-# /tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
-# /tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
-# /tensorflow/contrib/cmake/ @mrry @benoitsteiner
-# /tensorflow/contrib/copy_graph/ @tucker @poxvoculi
-# /tensorflow/contrib/crf/ @kentonl
-# /tensorflow/contrib/data/ @mrry
-# /tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
-# /tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
-# /tensorflow/contrib/ffmpeg/ @fredbertsch
-# NEED OWNER: /tensorflow/contrib/framework/
-# /tensorflow/contrib/graph_editor/ @purpledog
+# NEED OWNER: /tensorflow/contrib/all_reduce
+/tensorflow/contrib/batching/ @alextp @chrisolston
+/tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
+/tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
+/tensorflow/contrib/checkpoint/ @allenlavoie
+/tensorflow/contrib/contrib/cluster_resolver/ @frankchn
+/tensorflow/contrib/cmake/ @mrry
+/tensorflow/contrib/copy_graph/ @tucker @poxvoculi
+/tensorflow/contrib/crf/ @kentonl
+/tensorflow/contrib/data/ @mrry
+/tensorflow/tensorflow/contrib/distribute @joshl @priyag @sourabhbajaj @frankchn
+/tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
+/tensorflow/contrib/eager @alextp @asimshankar
+/tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
+/tensorflow/contrib/ffmpeg/ @fredbertsch
+/tensorflow/contrib/framework/ @ebrevdo
+/tensorflow/contrib/gan/ @joel-shor
+/tensorflow/contrib/graph_editor/ @purpledog
# NEED OWNER: /tensorflow/contrib/grid_rnn/
-# /tensorflow/contrib/hvx/ @satok16
-# /tensorflow/contrib/integrate/ @shoyer
-# /tensorflow/contrib/kernel_methods/ @petrosmol
-# /tensorflow/contrib/ios_examples/ @petewarden
-# /tensorflow/contrib/labeled_tensor/ @shoyer
-# /tensorflow/contrib/layers/ @fchollet @martinwicke
-# /tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
-# /tensorflow/contrib/linalg/ @langmore
-# /tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
-# /tensorflow/contrib/lookup/ @ysuematsu @andreasst
-# /tensorflow/contrib/losses/ @alextp @ispirmustafa
-# /tensorflow/contrib/makefile/ @petewarden @satok16 @wolffg
-# /tensorflow/contrib/metrics/ @alextp @honkentuber @ispirmustafa
-# /tensorflow/contrib/nccl/ @cwhipkey @zheng-xq
-# /tensorflow/contrib/opt/ @strategist333
-# /tensorflow/contrib/pi_examples/ @maciekcc
-# /tensorflow/contrib/quantization/ @petewarden @cwhipkey @keveman
-# /tensorflow/contrib/rnn/ @ebrevdo
-# /tensorflow/contrib/saved_model/ @nfiedel @sukritiramesh
-# /tensorflow/contrib/seq2seq/ @lukaszkaiser
-# /tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
-# /tensorflow/contrib/slim/ @sguada @thenbasilmanran
-# /tensorflow/contrib/stateless/ @girving
-# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank
-# /tensorflow/contrib/testing/ @dandelionmane
-# /tensorflow/contrib/timeseries/ @allenlavoie
-# /tensorflow/contrib/tpu/ @frankchn @saeta @jhseu
-# /tensorflow/contrib/training/ @joel-shor @ebrevdo
-# /tensorflow/contrib/util/ @sherrym
+/tensorflow/contrib/hadoop @yongtang
+/tensorflow/contrib/hvx/ @satok16
+/tensorflow/contrib/integrate/ @shoyer
+/tensorflow/contrib/kafka @yongtang
+/tensorflow/contrib/kernel_methods/ @petrosmol
+/tensorflow/contrib/kinesis @yongtang
+/tensorflow/contrib/ios_examples/ @petewarden
+/tensorflow/contrib/labeled_tensor/ @shoyer
+/tensorflow/contrib/layers/ @fchollet @martinwicke
+/tensorflow/contrib/learn/ @martinwicke @ispirmustafa @alextp
+/tensorflow/contrib/linear_optimizer/ @petrosmol @andreasst @katsiapis
+/tensorflow/contrib/lookup/ @ysuematsu @andreasst
+/tensorflow/contrib/losses/ @alextp @ispirmustafa
+/tensorflow/contrib/makefile/ @petewarden @satok16 @wolffg
+/tensorflow/contrib/metrics/ @alextp @honkentuber @ispirmustafa
+/tensorflow/contrib/nccl/ @cwhipkey @zheng-xq
+/tensorflow/contrib/opt/ @strategist333 @alextp
+/tensorflow/contrib/pi_examples/ @maciekcc
+/tensorflow/contrib/quantization/ @petewarden
+/tensorflow/contrib/rnn/ @ebrevdo @scottzhu
+/tensorflow/contrib/saved_model/ @nfiedel @sukritiramesh @allenl
+/tensorflow/contrib/seq2seq/ @ebrevdo @lmthang
+/tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
+/tensorflow/contrib/slim/ @sguada @thenbasilmanran
+/tensorflow/contrib/stateless/ @girving @alextp
+/tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank
+/tensorflow/contrib/tensorrt/ @aaroey
+# NEED OWNER: /tensorflow/contrib/testing/
+/tensorflow/contrib/timeseries/ @allenlavoie
+/tensorflow/contrib/tpu/ @frankchn @saeta @jhseu @sourabhbajaj
+/tensorflow/contrib/training/ @joel-shor @ebrevdo
+/tensorflow/contrib/util/ @sherrym
+
+/third_party/systemlibs/ @perfinion
diff --git a/README.md b/README.md
index 823c6880967a29f3e4838f7c120961c1b16e2b5f..c3455474260b2db56f1f585b70af9c259704d01a 100644
--- a/README.md
+++ b/README.md
@@ -29,7 +29,21 @@ subscribing to
[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce).
## Installation
-*See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.*
+
+To install the current release for CPU-only:
+
+```
+pip install tensorflow
+```
+
+Use the GPU package for CUDA-enabled GPU cards:
+
+```
+pip install tensorflow-gpu
+```
+
+*See [Installing TensorFlow](https://www.tensorflow.org/install) for detailed
+instructions, and how to build from source.*
People who are a little more adventurous can also try our nightly binaries:
@@ -48,15 +62,12 @@ $ python
```
```python
>>> import tensorflow as tf
+>>> tf.enable_eager_execution()
+>>> tf.add(1, 2)
+3
>>> hello = tf.constant('Hello, TensorFlow!')
->>> sess = tf.Session()
->>> sess.run(hello)
+>>> hello.numpy()
'Hello, TensorFlow!'
->>> a = tf.constant(10)
->>> b = tf.constant(32)
->>> sess.run(a + b)
-42
->>> sess.close()
```
Learn more examples about how to do specific tasks in TensorFlow at the [tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/).
@@ -90,27 +101,29 @@ The TensorFlow project strives to abide by generally accepted best practices in
| **Windows CPU** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-cpu.html) | [pypi](https://pypi.org/project/tf-nightly/) |
| **Windows GPU** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/windows-gpu.html) | [pypi](https://pypi.org/project/tf-nightly-gpu/) |
| **Android** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/android.html) | [](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) |
+| **Raspberry Pi 0 and 1** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py2.html) [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi01-py3.html) | [Py2](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp27-none-linux_armv6l.whl) [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv6l.whl) |
+| **Raspberry Pi 2 and 3** | [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py2.html) [](https://storage.googleapis.com/tensorflow-kokoro-build-badges/rpi23-py3.html) | [Py2](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp27-none-linux_armv7l.whl) [Py3](https://storage.googleapis.com/tensorflow-nightly/tensorflow-1.10.0-cp34-none-linux_armv7l.whl) |
### Community Supported Builds
-| Build Type | Status | Artifacts |
-| --- | --- | --- |
-| **IBM s390x** | [](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA |
-| **IBM ppc64le CPU** | [](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA |
-| **IBM ppc64le GPU** | [](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA |
-| **Linux CPU with Intel® MKL-DNN** Nightly | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) |
-| **Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild)|[1.10.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp27-cp27mu-linux_x86_64.whl)
[1.10.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl)
[1.10.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp36-cp36m-linux_x86_64.whl) |
-
+Build Type | Status | Artifacts
+---------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
+**IBM s390x** | [](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA
+**IBM ppc64le CPU** | [](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA
+**IBM ppc64le GPU** Nightly | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
+**IBM ppc64le GPU** Stable Release | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/)
+**Linux CPU with Intel® MKL-DNN** Nightly | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)
+**Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.10.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp27-cp27mu-linux_x86_64.whl)
[1.10.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl)
[1.10.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp36-cp36m-linux_x86_64.whl)
## For more information
-* [Tensorflow Blog](https://medium.com/tensorflow)
-* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
+* [TensorFlow Website](https://www.tensorflow.org)
+* [TensorFlow Tutorials](https://www.tensorflow.org/tutorials/)
* [TensorFlow Model Zoo](https://github.com/tensorflow/models)
-* [TensorFlow MOOC on Udacity](https://www.udacity.com/course/deep-learning--ud730)
+* [TensorFlow Twitter](https://twitter.com/tensorflow)
+* [TensorFlow Blog](https://medium.com/tensorflow)
+* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
* [TensorFlow Roadmap](https://www.tensorflow.org/community/roadmap)
-* [Tensorflow Twitter](https://twitter.com/tensorflow)
-* [TensorFlow Website](https://www.tensorflow.org)
* [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
* [TensorFlow YouTube Channel](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ)
diff --git a/RELEASE.md b/RELEASE.md
index 763ef3b279dde209ed387534032deae40a33a9e4..20e1d9217b7684e696d0abf427eef9ab9548d1b7 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,86 @@
+# Release 1.11.0
+
+## Major Features and Improvements
+
+* Nvidia GPU:
+ * Prebuilt binaries are now (as of TensorFlow 1.11) built against cuDNN 7.2 and TensorRT 4. See updated install guides: [Installing TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux#tensorflow_gpu_support)
+* Google Cloud TPU:
+ * Experimental tf.data integration for Keras on Google Cloud TPUs.
+ * Experimental / preview support for eager execution on Google Cloud TPUs.
+* DistributionStrategy:
+ * Add multi-GPU DistributionStrategy support in tf.keras. Users can now use `fit`, `evaluate` and `predict` to distribute their model on multiple GPUs.
+ * Add multi-worker DistributionStrategy and standalone client support in Estimator. See [README] (https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute) for more details.
+* Add C, C++, and Python functions for querying kernels
+
+## Breaking Changes
+
+* Keras:
+ * The default values for tf.keras `RandomUniform`, `RandomNormal`, and `TruncatedNormal` initializers have been changed to match those in external Keras.
+ * Breaking change: `model.get_config()` on a Sequential model now returns a config dictionary (consistent with other Model instances) instead of a list of configs for the underlying layers.
+
+## Bug Fixes and Other Changes
+
+* C++:
+ * Changed the signature of SessionFactory::NewSession so that it can return a meaningful error message on failure.
+* tf.data:
+ * Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`. [tf.data] Remove `num_parallel_parser_calls` argument from `tf.contrib.data.make_csv_dataset()`.
+ * `tf.data.Dataset.list_files()` raises an exception at initialization time if the argument matches no files.
+ * Renamed BigTable class to BigtableTable for clarity
+ * Document use of the Cloud Bigtable API
+ * Adding `tf.contrib.data.reduce_dataset` which can be used to reduce a dataset to a single element.
+ * Generalization of `tf.contrib.data.sliding_window_batch`.
+* INC:
+ * Runtime improvements to triangular solve.
+* `tf.contrib`:
+ * Add an `implementation` argument to `tf.keras.layers.LocallyConnected2D` and `tf.keras.layers.LocallyConnected1D`. The new mode (`implementation=2`) performs forward pass as a single dense matrix multiplication, allowing dramatic speedups in certain scenarios (but worse performance in others - see docstring). The option also allows to use `padding=same`.
+ * Add documentation clarifying the differences between tf.fill and tf.constant.
+ * Add experimental IndexedDatasets.
+ * Add selective registration target using the lite proto runtime.
+ * Add simple Tensor and DataType classes to TensorFlow Lite Java
+ * Add support for bitcasting to/from uint32 and uint64.
+ * Added a subclass of Estimator that can be created from a SavedModel (SavedModelEstimator).
+ * Adds leaf index modes as an argument.
+ * Allow a different output shape from the input in tf.contrib.image.transform.
+ * Change the state_size order of the StackedRNNCell to be natural order. To keep the existing behavior, user can add reverse_state_order=True when constructing the StackedRNNCells.
+ * Deprecate self.test_session() in favor of self.session() or self.cached_session().
+ * Directly import tensor.proto.h (the transitive import will be removed from tensor.h soon)
+ * Estimator.train() now supports tf.contrib.summary.\* summaries out of the box; each call to .train() will now create a separate tfevents file rather than re-using a shared one.
+ * Fix FTRL L2-shrinkage behavior: the gradient from the L2 shrinkage term should not end up in the accumulator.
+ * Fix toco compilation/execution on Windows
+ * GoogleZoneProvider class added to detect which Google Cloud Engine zone tensorflow is running in.
+ * It is now safe to call any of the C API's TF_Delete\* functions on nullptr
+ * Log some errors on Android to logcat
+ * Match FakeQuant numerics in TFLite to improve accuracy of TFLite quantized inference models.
+ * Optional bucket location check for the GCS Filesystem.
+ * Performance enhancements for StringSplitOp & StringSplitV2Op.
+ * Performance improvements for regex replace operations.
+ * TFRecordWriter now raises an error if .write() fails.
+ * TPU: More helpful error messages in TPUClusterResolvers.
+ * The legacy_init_op argument to SavedModelBuilder methods for adding MetaGraphs has been deprecated. Please use the equivalent main_op argument instead. As part of this, we now explicitly check for a single main_op or legacy_init_op at the time of SavedModel building, whereas the check on main_op was previously only done at load time.
+ * The protocol used for Estimator training is now configurable in RunConfig.
+ * Triangular solve performance improvements.
+ * Unify RNN cell interface between TF and Keras. Add new get_initial_state() to Keras and TF RNN cell, which will use to replace the existing zero_state() method.
+ * Update initialization of variables in Keras.
+ * Updates to "constrained_optimization" in tensorflow/contrib.
+ * boosted trees: adding pruning mode
+ * tf.train.Checkpoint does not delete old checkpoints by default.
+ * tfdbg: Limit the total disk space occupied by dumped tensor data to 100 GBytes. Add environment variable `TFDBG_DISK_BYTES_LIMIT` to allow adjustment of this upper limit.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+Aapeli, adoda, Ag Ramesh, Amogh Mannekote, Andrew Gibiansky, Andy Craze, Anirudh Koul, Aurelien Geron, Avijit, Avijit-Nervana, Ben, Benjamin H. Myara, bhack, Brett Koonce, Cao Zongyan, cbockman, cheerss, Chikanaga Tomoyuki, Clayne Robison, cosine0, Cui Wei, Dan J, David, David Norman, Dmitry Klimenkov, Eliel Hojman, Florian Courtial, fo40225, formath, Geoffrey Irving, gracehoney, Grzegorz Pawelczak, Guoliang Hua, Guozhong Zhuang, Herman Zvonimir DošIlović, HuiyangFei, Jacker, Jan HüNnemeyer, Jason Taylor, Jason Zaman, Jesse, Jiang,Zhoulong, Jiawei Zhang, Jie, Joe Yearsley, Johannes Schmitz, Jon Perl, Jon Triebenbach, Jonathan, Jonathan Hseu, Jongmin Park, Justin Shenk, karl@kubx.ca, Kate Hodesdon, Kb Sriram, Keishi Hattori, Kenneth Blomqvist, Koan-Sin Tan, Li Liangbin, Li, Yiqiang, Loo Rong Jie, Madiyar, Mahmoud Abuzaina, Mark Ryan, Matt Dodge, mbhuiyan, melvinljy96, Miguel Mota, Nafis Sadat, Nathan Luehr, naurril, Nehal J Wani, Niall Moran, Niranjan Hasabnis, Nishidha Panpaliya, npow, olicht, Pei Zhang, Peng Wang (Simpeng), Peng Yu, Philipp Jund, Pradeep Banavara, Pratik Kalshetti, qwertWZ, Rakesh Chada, Randy West, Ray Kim, Rholais Lii, Robin Richtsfeld, Rodrigo Silveira, Ruizhi, Santosh Kumar, Seb Bro, Sergei Lebedev, sfujiwara, Shaba Abhiram, Shashi, SneakyFish5, Soila Kavulya, Stefan Dyulgerov, Steven Winston, Sunitha Kambhampati, Surry Shome, Taehoon Lee, Thor Johnsen, Tristan Rice, TShapinsky, tucan, tucan9389, Vicente Reyes, Vilmar-Hillow, Vitaly Lavrukhin, wangershi, weidan.kong, weidankong, Wen-Heng (Jack) Chung, William D. Irons, Wim Glenn, XFeiF, Yan Facai (颜发才), Yanbo Liang, Yong Tang, Yoshihiro Yamazaki, Yuan (Terry) Tang, Yuan, Man, zhaoyongke, ÁRon
+Ricardo Perez-Lopez, 张天启, 张晓飞
+
+
+# Release 1.10.1
+## Bug Fixes and Other Changes
+
+* `tf.keras`:
+ * Fixing keras on Cloud TPUs. No new binaries will be built for Windows.
+
+
# Release 1.10.0
## Major Features And Improvements
@@ -11,7 +94,7 @@
## Breaking Changes
-* Prebuilt binaries are now (as of TensorFlow 1.10) built against NCCL 2.2 and no longer include NCCL in the binary install. TensorFlow usage with multiple GPUs and NCCL requires upgrade to [NCCL 2.2](https://developer.nvidia.com/nccl). See updated install guides: [Installing TensorFlow on Ubuntu](https://www.tensorflow.org/install/install_linux#tensorflow_gpu_support) and [Install TensorFlow from Sources](https://www.tensorflow.org/install/install_sources#optional_install_tensorflow_for_gpu_prerequisites).
+* Prebuilt binaries are now (as of TensorFlow 1.10) built against NCCL 2.2 and no longer include NCCL in the binary install. TensorFlow usage with multiple GPUs and NCCL requires upgrade to [NCCL 2.2](https://developer.nvidia.com/nccl). See updated install guides: [TensorFlow GPU support](https://www.tensorflow.org/install/gpu) and [Build TensorFlow from source](https://www.tensorflow.org/install/source).
* Starting from TensorFlow 1.11, Windows builds will use Bazel. Therefore, we will drop official support for cmake.
## Bug Fixes and Other Changes
diff --git a/configure.py b/configure.py
index 10fee6993eb52f71e2d0ad4d4c23eb3b53adc537..b564da27227ec07713f91e925ea292b35f0f02df 100644
--- a/configure.py
+++ b/configure.py
@@ -35,24 +35,30 @@ except ImportError:
_DEFAULT_CUDA_VERSION = '9.0'
_DEFAULT_CUDNN_VERSION = '7'
-_DEFAULT_NCCL_VERSION = '2.2'
_DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0'
_DEFAULT_CUDA_PATH = '/usr/local/cuda'
_DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
_DEFAULT_CUDA_PATH_WIN = ('C:/Program Files/NVIDIA GPU Computing '
'Toolkit/CUDA/v%s' % _DEFAULT_CUDA_VERSION)
-_DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine()
_TF_OPENCL_VERSION = '1.2'
_DEFAULT_COMPUTECPP_TOOLKIT_PATH = '/usr/local/computecpp'
_DEFAULT_TRISYCL_INCLUDE_DIR = '/usr/local/triSYCL/include'
-_SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15]
+_SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15, 16]
_DEFAULT_PROMPT_ASK_ATTEMPTS = 10
-_TF_WORKSPACE_ROOT = os.path.abspath(os.path.dirname(__file__))
_TF_BAZELRC_FILENAME = '.tf_configure.bazelrc'
-_TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
-_TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE')
+_TF_WORKSPACE_ROOT = ''
+_TF_BAZELRC = ''
+
+NCCL_LIB_PATHS = [
+ 'lib64/', 'lib/powerpc64le-linux-gnu/', 'lib/x86_64-linux-gnu/', ''
+]
+
+if platform.machine() == 'ppc64le':
+ _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/powerpc64le-linux-gnu/'
+else:
+ _DEFAULT_TENSORRT_PATH_LINUX = '/usr/lib/%s-linux-gnu' % platform.machine()
class UserInputError(Exception):
@@ -153,14 +159,18 @@ def get_python_path(environ_cp, python_bin_path):
if environ_cp.get('PYTHONPATH'):
python_paths = environ_cp.get('PYTHONPATH').split(':')
try:
- library_paths = run_shell(
- [python_bin_path, '-c',
- 'import site; print("\\n".join(site.getsitepackages()))']).split('\n')
+ library_paths = run_shell([
+ python_bin_path, '-c',
+ 'import site; print("\\n".join(site.getsitepackages()))'
+ ]).split('\n')
except subprocess.CalledProcessError:
- library_paths = [run_shell(
- [python_bin_path, '-c',
- 'from distutils.sysconfig import get_python_lib;'
- 'print(get_python_lib())'])]
+ library_paths = [
+ run_shell([
+ python_bin_path, '-c',
+ 'from distutils.sysconfig import get_python_lib;'
+ 'print(get_python_lib())'
+ ])
+ ]
all_paths = set(python_paths + library_paths)
@@ -187,8 +197,7 @@ def setup_python(environ_cp):
environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path,
default_python_bin_path)
# Check if the path is valid
- if os.path.isfile(python_bin_path) and os.access(
- python_bin_path, os.X_OK):
+ if os.path.isfile(python_bin_path) and os.access(python_bin_path, os.X_OK):
break
elif not os.path.exists(python_bin_path):
print('Invalid python path: %s cannot be found.' % python_bin_path)
@@ -217,7 +226,7 @@ def setup_python(environ_cp):
python_lib_path = default_python_lib_path
environ_cp['PYTHON_LIB_PATH'] = python_lib_path
- python_major_version = get_python_major_version(python_bin_path)
+ _ = get_python_major_version(python_bin_path)
# Convert python path to Windows style before writing into bazel.rc
if is_windows() or is_cygwin():
@@ -230,15 +239,16 @@ def setup_python(environ_cp):
environ_cp['PYTHON_BIN_PATH'] = python_bin_path
# Write tools/python_bin_path.sh
- with open(os.path.join(
- _TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), 'w') as f:
+ with open(
+ os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'),
+ 'w') as f:
f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path)
-def reset_tf_configure_bazelrc(workspace_path):
+def reset_tf_configure_bazelrc():
"""Reset file that contains customized config settings."""
open(_TF_BAZELRC, 'w').close()
- bazelrc_path = os.path.join(workspace_path, '.bazelrc')
+ bazelrc_path = os.path.join(_TF_WORKSPACE_ROOT, '.bazelrc')
data = []
if os.path.exists(bazelrc_path):
@@ -249,20 +259,15 @@ def reset_tf_configure_bazelrc(workspace_path):
if _TF_BAZELRC_FILENAME in l:
continue
f.write('%s\n' % l)
- if is_windows():
- tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/")
- else:
- tf_bazelrc_path = _TF_BAZELRC
- f.write('import %s\n' % tf_bazelrc_path)
-
+ f.write('import %%workspace%%/%s\n' % _TF_BAZELRC_FILENAME)
def cleanup_makefile():
"""Delete any leftover BUILD files from the Makefile build.
These files could interfere with Bazel parsing.
"""
- makefile_download_dir = os.path.join(
- _TF_WORKSPACE_ROOT, 'tensorflow', 'contrib', 'makefile', 'downloads')
+ makefile_download_dir = os.path.join(_TF_WORKSPACE_ROOT, 'tensorflow',
+ 'contrib', 'makefile', 'downloads')
if os.path.isdir(makefile_download_dir):
for root, _, filenames in os.walk(makefile_download_dir):
for f in filenames:
@@ -330,9 +335,8 @@ def get_var(environ_cp,
'Environment variable %s must be set as a boolean indicator.\n'
'The following are accepted as TRUE : %s.\n'
'The following are accepted as FALSE: %s.\n'
- 'Current value is %s.' % (
- var_name, ', '.join(true_strings), ', '.join(false_strings),
- var))
+ 'Current value is %s.' % (var_name, ', '.join(true_strings),
+ ', '.join(false_strings), var))
while var is None:
user_input_origin = get_input(question)
@@ -355,8 +359,12 @@ def get_var(environ_cp,
return var
-def set_build_var(environ_cp, var_name, query_item, option_name,
- enabled_by_default, bazel_config_name=None):
+def set_build_var(environ_cp,
+ var_name,
+ query_item,
+ option_name,
+ enabled_by_default,
+ bazel_config_name=None):
"""Set if query_item will be enabled for the build.
Ask user if query_item will be enabled. Default is used if no input is given.
@@ -375,12 +383,14 @@ def set_build_var(environ_cp, var_name, query_item, option_name,
var = str(int(get_var(environ_cp, var_name, query_item, enabled_by_default)))
environ_cp[var_name] = var
if var == '1':
- write_to_bazelrc('build --define %s=true' % option_name)
+ write_to_bazelrc(
+ 'build:%s --define %s=true' % (bazel_config_name, option_name))
+ write_to_bazelrc('build --config=%s' % bazel_config_name)
elif bazel_config_name is not None:
# TODO(mikecase): Migrate all users of configure.py to use --config Bazel
# options and not to set build configs through environment variables.
- write_to_bazelrc('build:%s --define %s=true'
- % (bazel_config_name, option_name))
+ write_to_bazelrc(
+ 'build:%s --define %s=true' % (bazel_config_name, option_name))
def set_action_env_var(environ_cp,
@@ -447,7 +457,8 @@ def check_bazel_version(min_version):
if which('bazel') is None:
print('Cannot find bazel. Please install bazel.')
sys.exit(0)
- curr_version = run_shell(['bazel', '--batch', '--bazelrc=/dev/null', 'version'])
+ curr_version = run_shell(
+ ['bazel', '--batch', '--bazelrc=/dev/null', 'version'])
for line in curr_version.split('\n'):
if 'Build label: ' in line:
@@ -486,7 +497,7 @@ def set_cc_opt_flags(environ_cp):
elif is_windows():
default_cc_opt_flags = '/arch:AVX'
else:
- default_cc_opt_flags = '-march=native'
+ default_cc_opt_flags = '-march=native -Wno-sign-compare'
question = ('Please specify optimization flags to use during compilation when'
' bazel option "--config=opt" is specified [Default is %s]: '
) % default_cc_opt_flags
@@ -499,6 +510,7 @@ def set_cc_opt_flags(environ_cp):
write_to_bazelrc('build:opt --host_copt=-march=native')
write_to_bazelrc('build:opt --define with_default_optimizations=true')
+
def set_tf_cuda_clang(environ_cp):
"""set TF_CUDA_CLANG action_env.
@@ -581,16 +593,14 @@ def set_clang_cuda_compiler_path(environ_cp):
clang_cuda_compiler_path)
-def prompt_loop_or_load_from_env(
- environ_cp,
- var_name,
- var_default,
- ask_for_var,
- check_success,
- error_msg,
- suppress_default_error=False,
- n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS
-):
+def prompt_loop_or_load_from_env(environ_cp,
+ var_name,
+ var_default,
+ ask_for_var,
+ check_success,
+ error_msg,
+ suppress_default_error=False,
+ n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS):
"""Loop over user prompts for an ENV param until receiving a valid response.
For the env param var_name, read from the environment or verify user input
@@ -629,9 +639,7 @@ def prompt_loop_or_load_from_env(
)
for _ in range(n_ask_attempts):
- val = get_from_env_or_user_or_default(environ_cp,
- var_name,
- full_query,
+ val = get_from_env_or_user_or_default(environ_cp, var_name, full_query,
default)
if check_success(val):
break
@@ -639,9 +647,9 @@ def prompt_loop_or_load_from_env(
print(error_msg % val)
environ_cp[var_name] = ''
else:
- raise UserInputError('Invalid %s setting was provided %d times in a row. '
- 'Assuming to be a scripting mistake.' %
- (var_name, n_ask_attempts))
+ raise UserInputError(
+ 'Invalid %s setting was provided %d times in a row. '
+ 'Assuming to be a scripting mistake.' % (var_name, n_ask_attempts))
environ_cp[var_name] = val
return val
@@ -650,8 +658,8 @@ def prompt_loop_or_load_from_env(
def create_android_ndk_rule(environ_cp):
"""Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule."""
if is_windows() or is_cygwin():
- default_ndk_path = cygpath('%s/Android/Sdk/ndk-bundle' %
- environ_cp['APPDATA'])
+ default_ndk_path = cygpath(
+ '%s/Android/Sdk/ndk-bundle' % environ_cp['APPDATA'])
elif is_macos():
default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME']
else:
@@ -668,8 +676,7 @@ def create_android_ndk_rule(environ_cp):
ask_for_var='Please specify the home path of the Android NDK to use.',
check_success=valid_ndk_path,
error_msg=('The path %s or its child file "source.properties" '
- 'does not exist.')
- )
+ 'does not exist.'))
write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path)
write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL',
check_ndk_level(android_ndk_home_path))
@@ -703,9 +710,9 @@ def create_android_sdk_rule(environ_cp):
api_levels = [x.replace('android-', '') for x in api_levels]
def valid_api_level(api_level):
- return os.path.exists(os.path.join(android_sdk_home_path,
- 'platforms',
- 'android-' + api_level))
+ return os.path.exists(
+ os.path.join(android_sdk_home_path, 'platforms',
+ 'android-' + api_level))
android_api_level = prompt_loop_or_load_from_env(
environ_cp,
@@ -720,9 +727,8 @@ def create_android_sdk_rule(environ_cp):
versions = sorted(os.listdir(build_tools))
def valid_build_tools(version):
- return os.path.exists(os.path.join(android_sdk_home_path,
- 'build-tools',
- version))
+ return os.path.exists(
+ os.path.join(android_sdk_home_path, 'build-tools', version))
android_build_tools_version = prompt_loop_or_load_from_env(
environ_cp,
@@ -736,10 +742,8 @@ def create_android_sdk_rule(environ_cp):
write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION',
android_build_tools_version)
- write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL',
- android_api_level)
- write_action_env_to_bazelrc('ANDROID_SDK_HOME',
- android_sdk_home_path)
+ write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', android_api_level)
+ write_action_env_to_bazelrc('ANDROID_SDK_HOME', android_sdk_home_path)
def check_ndk_level(android_ndk_home_path):
@@ -798,6 +802,7 @@ def reformat_version_sequence(version_str, sequence_count):
Args:
version_str: String, the version string.
sequence_count: int, an integer.
+
Returns:
string, reformatted version string.
"""
@@ -841,18 +846,25 @@ def set_tf_cuda_version(environ_cp):
if is_windows():
cuda_rt_lib_paths = ['lib/x64/cudart.lib']
elif is_linux():
- cuda_rt_lib_paths = ['%s/libcudart.so.%s' % (x, tf_cuda_version)
- for x in ['lib64', 'lib/x86_64-linux-gnu']]
+ cuda_rt_lib_paths = [
+ '%s/libcudart.so.%s' % (x, tf_cuda_version) for x in [
+ 'lib64',
+ 'lib/powerpc64le-linux-gnu',
+ 'lib/x86_64-linux-gnu',
+ ]
+ ]
elif is_macos():
cuda_rt_lib_paths = ['lib/libcudart.%s.dylib' % tf_cuda_version]
- cuda_toolkit_paths_full = [os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths]
+ cuda_toolkit_paths_full = [
+ os.path.join(cuda_toolkit_path, x) for x in cuda_rt_lib_paths
+ ]
if any([os.path.exists(x) for x in cuda_toolkit_paths_full]):
break
# Reset and retry
print('Invalid path to CUDA %s toolkit. %s cannot be found' %
- (tf_cuda_version, cuda_toolkit_path_full))
+ (tf_cuda_version, cuda_toolkit_paths_full))
environ_cp['TF_CUDA_VERSION'] = ''
environ_cp['CUDA_TOOLKIT_PATH'] = ''
@@ -872,7 +884,7 @@ def set_tf_cudnn_version(environ_cp):
"""Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION."""
ask_cudnn_version = (
'Please specify the cuDNN version you want to use. '
- '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION
+ '[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION
for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
tf_cudnn_version = get_from_env_or_user_or_default(
@@ -919,8 +931,8 @@ def set_tf_cudnn_version(environ_cp):
cudnn_path_from_ldconfig)
if cudnn_path_from_ldconfig:
cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1)
- if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig,
- tf_cudnn_version)):
+ if os.path.exists(
+ '%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)):
cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig)
break
@@ -1029,7 +1041,7 @@ def set_tf_tensorrt_install_path(environ_cp):
for lib_file in possible_files:
if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver):
matches = nvinfer_pattern.search(lib_file)
- if len(matches.groups()) == 0:
+ if not matches.groups():
continue
ver_str = matches.group(1)
ver = convert_version_to_int(ver_str) if len(ver_str) else 0
@@ -1085,7 +1097,7 @@ def set_tf_tensorrt_install_path(environ_cp):
def set_tf_nccl_install_path(environ_cp):
- """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION.
+ """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION.
Args:
environ_cp: copy of the os.environ.
@@ -1098,59 +1110,119 @@ def set_tf_nccl_install_path(environ_cp):
raise ValueError('Currently NCCL is only supported on Linux platforms.')
ask_nccl_version = (
- 'Please specify the NCCL version you want to use. If NCCL %s is not '
- 'installed, then you can use version 1.3 that can be fetched '
- 'automatically but it may have worse performance with multiple GPUs. '
- '[Default is %s]: ') % (_DEFAULT_NCCL_VERSION, _DEFAULT_NCCL_VERSION)
+ 'Please specify the locally installed NCCL version you want to use. '
+ '[Default is to use https://github.com/nvidia/nccl]: ')
for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
tf_nccl_version = get_from_env_or_user_or_default(
- environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION)
- tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
+ environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, '')
+
+ if not tf_nccl_version:
+ break # No need to get install path, building the open source code.
- if tf_nccl_version == '1':
- break # No need to get install path, NCCL 1 is a GitHub repo.
+ tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
- # TODO(csigg): Look with ldconfig first if we can find the library in paths
+ # Look with ldconfig first if we can find the library in paths
# like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
# include directory. This is where the NCCL .deb packages install them.
- # Then ask the user if we should use that. Instead of a single
- # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to
- # nccl_configure.bzl
- default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
- ask_nccl_path = (r'Please specify the location where NCCL %s library is '
- 'installed. Refer to README.md for more details. [Default '
- 'is %s]:') % (tf_nccl_version, default_nccl_path)
- nccl_install_path = get_from_env_or_user_or_default(
- environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path)
- # Result returned from "read" will be used unexpanded. That make "~"
- # unusable. Going through one more level of expansion to handle that.
- nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
- if is_windows() or is_cygwin():
- nccl_install_path = cygpath(nccl_install_path)
-
- if is_windows():
- nccl_lib_path = 'lib/x64/nccl.lib'
- elif is_linux():
- nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version
- elif is_macos():
- nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
-
- nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
- nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h')
- if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
- # Set NCCL_INSTALL_PATH
- environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
- write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
- break
-
- # Reset and Retry
- print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
+ # First check to see if NCCL is in the ldconfig.
+ # If its found, use that location.
+ if is_linux():
+ ldconfig_bin = which('ldconfig') or '/sbin/ldconfig'
+ nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p'])
+ nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)',
+ nccl2_path_from_ldconfig)
+ if nccl2_path_from_ldconfig:
+ nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1)
+ if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)):
+ nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig)
+ print('NCCL libraries found in ' + nccl2_path_from_ldconfig)
+
+ # Check if this is the main system lib location
+ if re.search('.*linux-gnu', nccl_install_path):
+ trunc_nccl_install_path = '/usr'
+ print('This looks like a system path.')
+ else:
+ trunc_nccl_install_path = nccl_install_path + '/..'
+
+ # Look for header
+ nccl_hdr_path = trunc_nccl_install_path + '/include'
+ print('Assuming NCCL header path is ' + nccl_hdr_path)
+ if os.path.exists(nccl_hdr_path + '/nccl.h'):
+ # Set NCCL_INSTALL_PATH
+ environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
+ write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
+
+ # Set NCCL_HDR_PATH
+ environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path
+ write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path)
+ break
+ else:
+ print(
+ 'The header for NCCL2 cannot be found. Please install the libnccl-dev package.'
+ )
+ else:
+ print('NCCL2 is listed by ldconfig but the library is not found. '
+ 'Your ldconfig is out of date. Please run sudo ldconfig.')
+ else:
+ # NCCL is not found in ldconfig. Ask the user for the location.
+ default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
+ ask_nccl_path = (
+ r'Please specify the location where NCCL %s library is '
+ 'installed. Refer to README.md for more details. [Default '
+ 'is %s]:') % (tf_nccl_version, default_nccl_path)
+ nccl_install_path = get_from_env_or_user_or_default(
+ environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path)
+
+ # Result returned from "read" will be used unexpanded. That make "~"
+ # unusable. Going through one more level of expansion to handle that.
+ nccl_install_path = os.path.realpath(
+ os.path.expanduser(nccl_install_path))
+ if is_windows() or is_cygwin():
+ nccl_install_path = cygpath(nccl_install_path)
+
+ if is_windows():
+ nccl_lib_path = 'lib/x64/nccl.lib'
+ elif is_linux():
+ nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version
+ nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename)
+ if not os.path.exists(nccl_lpath):
+ for relative_path in NCCL_LIB_PATHS:
+ path = '%s/%s%s' % (nccl_install_path, relative_path,
+ nccl_lib_filename)
+ if os.path.exists(path):
+ print('NCCL found at ' + path)
+ nccl_lib_path = path
+ break
+ else:
+ nccl_lib_path = nccl_lpath
+ elif is_macos():
+ nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
+
+ nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
+ nccl_hdr_path = os.path.join(
+ os.path.dirname(nccl_lib_path), '../include/nccl.h')
+ print('Assuming NCCL header path is ' + nccl_hdr_path)
+ if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
+ # Set NCCL_INSTALL_PATH
+ environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path)
+ write_action_env_to_bazelrc('NCCL_INSTALL_PATH',
+ os.path.dirname(nccl_lib_path))
+
+ # Set NCCL_HDR_PATH
+ environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path)
+ write_action_env_to_bazelrc('NCCL_HDR_PATH',
+ os.path.dirname(nccl_hdr_path))
+ break
+
+ # Reset and Retry
+ print(
+ 'Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
nccl_hdr_path))
- environ_cp['TF_NCCL_VERSION'] = ''
+ environ_cp['TF_NCCL_VERSION'] = ''
else:
raise UserInputError('Invalid TF_NCCL setting was provided %d '
'times in a row. Assuming to be a scripting mistake.' %
@@ -1160,12 +1232,12 @@ def set_tf_nccl_install_path(environ_cp):
environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version)
-
def get_native_cuda_compute_capabilities(environ_cp):
"""Get native cuda compute capabilities.
Args:
environ_cp: copy of the os.environ.
+
Returns:
string of native cuda compute capabilities, separated by comma.
"""
@@ -1290,8 +1362,7 @@ def set_computecpp_toolkit_path(environ_cp):
else:
sycl_rt_lib_path = ''
- sycl_rt_lib_path_full = os.path.join(toolkit_path,
- sycl_rt_lib_path)
+ sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path)
exists = os.path.exists(sycl_rt_lib_path_full)
if not exists:
print('Invalid SYCL %s library path. %s cannot be found' %
@@ -1319,8 +1390,8 @@ def set_trisycl_include_dir(environ_cp):
ask_trisycl_include_dir = ('Please specify the location of the triSYCL '
'include directory. (Use --config=sycl_trisycl '
'when building with Bazel) '
- '[Default is %s]: '
- ) % (_DEFAULT_TRISYCL_INCLUDE_DIR)
+ '[Default is %s]: ') % (
+ _DEFAULT_TRISYCL_INCLUDE_DIR)
while True:
trisycl_include_dir = get_from_env_or_user_or_default(
@@ -1329,13 +1400,12 @@ def set_trisycl_include_dir(environ_cp):
if os.path.exists(trisycl_include_dir):
break
- print('Invalid triSYCL include directory, %s cannot be found'
- % (trisycl_include_dir))
+ print('Invalid triSYCL include directory, %s cannot be found' %
+ (trisycl_include_dir))
# Set TRISYCL_INCLUDE_DIR
environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir
- write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR',
- trisycl_include_dir)
+ write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
def set_mpi_home(environ_cp):
@@ -1345,8 +1415,9 @@ def set_mpi_home(environ_cp):
default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home))
def valid_mpi_path(mpi_home):
- exists = (os.path.exists(os.path.join(mpi_home, 'include')) and
- os.path.exists(os.path.join(mpi_home, 'lib')))
+ exists = (
+ os.path.exists(os.path.join(mpi_home, 'include')) and
+ os.path.exists(os.path.join(mpi_home, 'lib')))
if not exists:
print('Invalid path to the MPI Toolkit. %s or %s cannot be found' %
(os.path.join(mpi_home, 'include'),
@@ -1395,16 +1466,22 @@ def set_other_mpi_vars(environ_cp):
raise ValueError('Cannot find the MPI library file in %s/lib' % mpi_home)
-def set_grpc_build_flags():
- write_to_bazelrc('build --define grpc_no_ares=true')
-
-
def set_system_libs_flag(environ_cp):
syslibs = environ_cp.get('TF_SYSTEM_LIBS', '')
- syslibs = ','.join(sorted(syslibs.split(',')))
- if syslibs and syslibs != '':
+ if syslibs:
+ if ',' in syslibs:
+ syslibs = ','.join(sorted(syslibs.split(',')))
+ else:
+ syslibs = ','.join(sorted(syslibs.split()))
write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs)
+ if 'PREFIX' in environ_cp:
+ write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX'])
+ if 'LIBDIR' in environ_cp:
+ write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR'])
+ if 'INCLUDEDIR' in environ_cp:
+ write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR'])
+
def set_windows_build_flags(environ_cp):
"""Set Windows specific build options."""
@@ -1424,11 +1501,9 @@ def set_windows_build_flags(environ_cp):
if get_var(
environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline',
- True,
- ('Would you like to override eigen strong inline for some C++ '
- 'compilation to reduce the compilation time?'),
- 'Eigen strong inline overridden.',
- 'Not overriding eigen strong inline, '
+ True, ('Would you like to override eigen strong inline for some C++ '
+ 'compilation to reduce the compilation time?'),
+ 'Eigen strong inline overridden.', 'Not overriding eigen strong inline, '
'some compilations could take more than 20 mins.'):
# Due to a known MSVC compiler issue
# https://github.com/tensorflow/tensorflow/issues/10521
@@ -1444,29 +1519,31 @@ def config_info_line(name, help_text):
def main():
+ global _TF_WORKSPACE_ROOT
+ global _TF_BAZELRC
+
parser = argparse.ArgumentParser()
- parser.add_argument("--workspace",
- type=str,
- default=_TF_WORKSPACE_ROOT,
- help="The absolute path to your active Bazel workspace.")
+ parser.add_argument(
+ '--workspace',
+ type=str,
+ default=os.path.abspath(os.path.dirname(__file__)),
+ help='The absolute path to your active Bazel workspace.')
args = parser.parse_args()
+ _TF_WORKSPACE_ROOT = args.workspace
+ _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME)
+
# Make a copy of os.environ to be clear when functions and getting and setting
# environment variables.
environ_cp = dict(os.environ)
check_bazel_version('0.15.0')
- reset_tf_configure_bazelrc(args.workspace)
+ reset_tf_configure_bazelrc()
cleanup_makefile()
setup_python(environ_cp)
if is_windows():
- environ_cp['TF_NEED_AWS'] = '0'
- environ_cp['TF_NEED_GCP'] = '0'
- environ_cp['TF_NEED_HDFS'] = '0'
- environ_cp['TF_NEED_JEMALLOC'] = '0'
- environ_cp['TF_NEED_KAFKA'] = '0'
environ_cp['TF_NEED_OPENCL_SYCL'] = '0'
environ_cp['TF_NEED_COMPUTECPP'] = '0'
environ_cp['TF_NEED_OPENCL'] = '0'
@@ -1475,14 +1552,10 @@ def main():
# TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on
# Windows.
environ_cp['TF_DOWNLOAD_CLANG'] = '0'
- environ_cp['TF_ENABLE_XLA'] = '0'
- environ_cp['TF_NEED_GDR'] = '0'
- environ_cp['TF_NEED_VERBS'] = '0'
environ_cp['TF_NEED_MPI'] = '0'
environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0'
if is_macos():
- environ_cp['TF_NEED_JEMALLOC'] = '0'
environ_cp['TF_NEED_TENSORRT'] = '0'
# The numpy package on ppc64le uses OpenBLAS which has multi-threading
@@ -1490,26 +1563,11 @@ def main():
# runtime to allow the Tensorflow testcases which compare numpy
# results to Tensorflow results to succeed.
if is_ppc64le():
- write_action_env_to_bazelrc("OMP_NUM_THREADS", 1)
-
- set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc',
- 'with_jemalloc', True)
- set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform',
- 'with_gcp_support', True, 'gcp')
- set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System',
- 'with_hdfs_support', True, 'hdfs')
- set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform',
- 'with_aws_support', True, 'aws')
- set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform',
- 'with_kafka_support', True, 'kafka')
+ write_action_env_to_bazelrc('OMP_NUM_THREADS', 1)
+
+ xla_enabled_by_default = is_linux()
set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support',
- False, 'xla')
- set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support',
- False, 'gdr')
- set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support',
- False, 'verbs')
- set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph',
- 'with_ngraph_support', False, 'ngraph')
+ xla_enabled_by_default, 'xla')
set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False)
if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
@@ -1521,6 +1579,13 @@ def main():
else:
set_trisycl_include_dir(environ_cp)
+ set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False)
+ if (environ_cp.get('TF_NEED_ROCM') == '1' and
+ 'LD_LIBRARY_PATH' in environ_cp and
+ environ_cp.get('LD_LIBRARY_PATH') != '1'):
+ write_action_env_to_bazelrc('LD_LIBRARY_PATH',
+ environ_cp.get('LD_LIBRARY_PATH'))
+
set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
if (environ_cp.get('TF_NEED_CUDA') == '1' and
'TF_CUDA_CONFIG_REPO' not in environ_cp):
@@ -1543,6 +1608,10 @@ def main():
if environ_cp.get('TF_DOWNLOAD_CLANG') != '1':
# Set up which clang we should use as the cuda / host compiler.
set_clang_cuda_compiler_path(environ_cp)
+ else:
+ # Use downloaded LLD for linking.
+ write_to_bazelrc('build:cuda_clang --config=download_clang_use_lld')
+ write_to_bazelrc('test:cuda_clang --config=download_clang_use_lld')
else:
# Set up which gcc nvcc should use as the host compiler
# No need to set this on Windows
@@ -1557,36 +1626,56 @@ def main():
write_to_bazelrc('build --config=download_clang')
write_to_bazelrc('test --config=download_clang')
+ # SYCL / ROCm / CUDA are mutually exclusive.
+ # At most 1 GPU platform can be configured.
+ gpu_platform_count = 0
+ if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1':
+ gpu_platform_count += 1
+ if environ_cp.get('TF_NEED_ROCM') == '1':
+ gpu_platform_count += 1
+ if environ_cp.get('TF_NEED_CUDA') == '1':
+ gpu_platform_count += 1
+ if gpu_platform_count >= 2:
+ raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
+ 'At most 1 GPU platform can be configured.')
+
set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
if environ_cp.get('TF_NEED_MPI') == '1':
set_mpi_home(environ_cp)
set_other_mpi_vars(environ_cp)
- set_grpc_build_flags()
set_cc_opt_flags(environ_cp)
set_system_libs_flag(environ_cp)
if is_windows():
set_windows_build_flags(environ_cp)
- if get_var(
- environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace',
- False,
- ('Would you like to interactively configure ./WORKSPACE for '
- 'Android builds?'),
- 'Searching for NDK and SDK installations.',
- 'Not configuring the WORKSPACE for Android builds.'):
+ # Add a config option to build TensorFlow 2.0 API.
+ write_to_bazelrc('build:v2 --define=tf_api_version=2')
+
+ if get_var(environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', False,
+ ('Would you like to interactively configure ./WORKSPACE for '
+ 'Android builds?'), 'Searching for NDK and SDK installations.',
+ 'Not configuring the WORKSPACE for Android builds.'):
create_android_ndk_rule(environ_cp)
create_android_sdk_rule(environ_cp)
- # On Windows, we don't have MKL support and the build is always monolithic.
- # So no need to print the following message.
- # TODO(pcloudy): remove the following if check when they make sense on Windows
- if not is_windows():
- print('Preconfigured Bazel build configs. You can use any of the below by '
- 'adding "--config=<>" to your build command. See tools/bazel.rc for '
- 'more details.')
- config_info_line('mkl', 'Build with MKL support.')
- config_info_line('monolithic', 'Config for mostly static monolithic build.')
+ print('Preconfigured Bazel build configs. You can use any of the below by '
+ 'adding "--config=<>" to your build command. See .bazelrc for more '
+ 'details.')
+ config_info_line('mkl', 'Build with MKL support.')
+ config_info_line('monolithic', 'Config for mostly static monolithic build.')
+ config_info_line('gdr', 'Build with GDR support.')
+ config_info_line('verbs', 'Build with libverbs support.')
+ config_info_line('ngraph', 'Build with Intel nGraph support.')
+
+ print('Preconfigured Bazel build configs to DISABLE default on features:')
+ config_info_line('noaws', 'Disable AWS S3 filesystem support.')
+ config_info_line('nogcp', 'Disable GCP support.')
+ config_info_line('nohdfs', 'Disable HDFS support.')
+ config_info_line('noignite', 'Disable Apacha Ignite support.')
+ config_info_line('nokafka', 'Disable Apache Kafka support.')
+
if __name__ == '__main__':
main()
+
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 9cc4c4567b4b2ea6bc29919bfa03c190c9005fbc..77e3baaff198b402dc04daa1b11e4007b9906b23 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -12,6 +12,7 @@ exports_files([
# The leakr files are used by //third_party/cloud_tpu.
"leakr_badwords.dic",
"leakr_badfiles.dic",
+ "leakr_file_type_recipe.ftrcp",
])
load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object")
@@ -23,11 +24,25 @@ load(
"//tensorflow/python/tools/api/generator:api_gen.bzl",
"gen_api_init_files", # @unused
)
+load("//tensorflow/python/tools/api/generator:api_gen.bzl", "get_compat_files")
+load(
+ "//tensorflow/python/tools/api/generator:api_init_files.bzl",
+ "TENSORFLOW_API_INIT_FILES", # @unused
+)
+load(
+ "//tensorflow/python/tools/api/generator:api_init_files_v1.bzl",
+ "TENSORFLOW_API_INIT_FILES_V1", # @unused
+)
load(
"//third_party/ngraph:build_defs.bzl",
"if_ngraph",
)
+# @unused
+TENSORFLOW_API_INIT_FILES_V2 = (
+ TENSORFLOW_API_INIT_FILES + get_compat_files(TENSORFLOW_API_INIT_FILES_V1, 1)
+)
+
# Config setting used when building for products
# which requires restricted licenses to be avoided.
config_setting(
@@ -188,81 +203,46 @@ config_setting(
visibility = ["//visibility:public"],
)
-# TODO(jhseu): Enable on other platforms other than Linux.
-config_setting(
- name = "with_jemalloc_linux_x86_64",
- define_values = {"with_jemalloc": "true"},
- values = {"cpu": "k8"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_jemalloc_linux_ppc64le",
- define_values = {"with_jemalloc": "true"},
- values = {"cpu": "ppc"},
- visibility = ["//visibility:public"],
-)
-
config_setting(
name = "with_default_optimizations",
define_values = {"with_default_optimizations": "true"},
visibility = ["//visibility:public"],
)
+# Features that are default ON are handled differently below.
+#
config_setting(
- name = "with_gcp_support",
- define_values = {"with_gcp_support": "true"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_hdfs_support",
- define_values = {"with_hdfs_support": "true"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_aws_support",
- define_values = {"with_aws_support": "true"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_kafka_support",
- define_values = {"with_kafka_support": "true"},
+ name = "no_aws_support",
+ define_values = {"no_aws_support": "false"},
visibility = ["//visibility:public"],
)
-# Crosses between platforms and file system libraries not supported on those
-# platforms due to limitations in nested select() statements.
config_setting(
- name = "with_gcp_support_windows_override",
- define_values = {"with_gcp_support": "true"},
- values = {"cpu": "x64_windows"},
+ name = "no_gcp_support",
+ define_values = {"no_gcp_support": "false"},
visibility = ["//visibility:public"],
)
config_setting(
- name = "with_hdfs_support_windows_override",
- define_values = {"with_hdfs_support": "true"},
- values = {"cpu": "x64_windows"},
+ name = "no_hdfs_support",
+ define_values = {"no_hdfs_support": "false"},
visibility = ["//visibility:public"],
)
config_setting(
- name = "with_aws_support_windows_override",
- define_values = {"with_aws_support": "true"},
- values = {"cpu": "x64_windows"},
+ name = "no_ignite_support",
+ define_values = {"no_ignite_support": "false"},
visibility = ["//visibility:public"],
)
config_setting(
- name = "with_kafka_support_windows_override",
- define_values = {"with_kafka_support": "true"},
- values = {"cpu": "x64_windows"},
+ name = "no_kafka_support",
+ define_values = {"no_kafka_support": "false"},
visibility = ["//visibility:public"],
)
+# Crosses between platforms and file system libraries not supported on those
+# platforms due to limitations in nested select() statements.
config_setting(
name = "with_cuda_support_windows_override",
define_values = {"using_cuda_nvcc": "true"},
@@ -270,48 +250,6 @@ config_setting(
visibility = ["//visibility:public"],
)
-config_setting(
- name = "with_gcp_support_android_override",
- define_values = {"with_gcp_support": "true"},
- values = {"crosstool_top": "//external:android/crosstool"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_hdfs_support_android_override",
- define_values = {"with_hdfs_support": "true"},
- values = {"crosstool_top": "//external:android/crosstool"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_aws_support_android_override",
- define_values = {"with_aws_support": "true"},
- values = {"crosstool_top": "//external:android/crosstool"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_gcp_support_ios_override",
- define_values = {"with_gcp_support": "true"},
- values = {"crosstool_top": "//tools/osx/crosstool:crosstool"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_hdfs_support_ios_override",
- define_values = {"with_hdfs_support": "true"},
- values = {"crosstool_top": "//tools/osx/crosstool:crosstool"},
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_aws_support_ios_override",
- define_values = {"with_aws_support": "true"},
- values = {"crosstool_top": "//tools/osx/crosstool:crosstool"},
- visibility = ["//visibility:public"],
-)
-
config_setting(
name = "with_xla_support",
define_values = {"with_xla_support": "true"},
@@ -340,30 +278,6 @@ config_setting(
visibility = ["//visibility:public"],
)
-config_setting(
- name = "with_jemalloc_linux_x86_64_dynamic",
- define_values = {
- "with_jemalloc": "true",
- "framework_shared_object": "true",
- },
- values = {
- "cpu": "k8",
- },
- visibility = ["//visibility:public"],
-)
-
-config_setting(
- name = "with_jemalloc_linux_ppc64le_dynamic",
- define_values = {
- "with_jemalloc": "true",
- "framework_shared_object": "true",
- },
- values = {
- "cpu": "ppc",
- },
- visibility = ["//visibility:public"],
-)
-
config_setting(
name = "using_cuda_clang",
define_values = {
@@ -423,12 +337,20 @@ config_setting(
visibility = ["//visibility:public"],
)
+# This flag specifies whether TensorFlow 2.0 API should be built instead
+# of 1.* API. Note that TensorFlow 2.0 API is currently under development.
+config_setting(
+ name = "api_version_2",
+ define_values = {"tf_api_version": "2"},
+)
+
package_group(
name = "internal",
packages = [
"-//third_party/tensorflow/python/estimator",
"//learning/meta_rank/...",
"//tensorflow/...",
+ "//tensorflow_estimator/...",
"//tensorflow_fold/llgtm/...",
"//third_party/py/tensor2tensor/...",
],
@@ -541,6 +463,7 @@ tf_cc_shared_object(
"$(location //tensorflow/c:version_script.lds)",
],
}),
+ visibility = ["//visibility:public"],
deps = [
"//tensorflow/c:c_api",
"//tensorflow/c:c_api_experimental",
@@ -565,6 +488,7 @@ tf_cc_shared_object(
"$(location //tensorflow:tf_version_script.lds)",
],
}),
+ visibility = ["//visibility:public"],
deps = [
"//tensorflow:tf_exported_symbols.lds",
"//tensorflow:tf_version_script.lds",
@@ -585,10 +509,73 @@ exports_files(
],
)
+genrule(
+ name = "install_headers",
+ srcs = [
+ "//tensorflow/c:headers",
+ "//tensorflow/c/eager:headers",
+ "//tensorflow/cc:headers",
+ "//tensorflow/core:headers",
+ ],
+ outs = ["include"],
+ cmd = """
+ mkdir $@
+ for f in $(SRCS); do
+ d="$${f%/*}"
+ d="$${d#bazel-out*genfiles/}"
+ d="$${d#*external/eigen_archive/}"
+
+ if [[ $${d} == *local_config_* ]]; then
+ continue
+ fi
+
+ if [[ $${d} == external* ]]; then
+ extname="$${d#*external/}"
+ extname="$${extname%%/*}"
+ if [[ $${TF_SYSTEM_LIBS:-} == *$${extname}* ]]; then
+ continue
+ fi
+ fi
+
+ mkdir -p "$@/$${d}"
+ cp "$${f}" "$@/$${d}/"
+ done
+ """,
+ tags = ["manual"],
+ visibility = ["//visibility:public"],
+)
+
+genrule(
+ name = "root_init_gen",
+ srcs = select({
+ "api_version_2": [":tf_python_api_gen_v2"],
+ "//conditions:default": [":tf_python_api_gen_v1"],
+ }),
+ outs = ["__init__.py"],
+ cmd = select({
+ "api_version_2": "cp $(@D)/_api/v2/__init__.py $(OUTS)",
+ "//conditions:default": "cp $(@D)/_api/v1/__init__.py $(OUTS)",
+ }),
+)
+
gen_api_init_files(
- name = "tensorflow_python_api_gen",
+ name = "tf_python_api_gen_v1",
srcs = ["api_template.__init__.py"],
api_version = 1,
+ output_dir = "_api/v1/",
+ output_files = TENSORFLOW_API_INIT_FILES_V1,
+ output_package = "tensorflow._api.v1",
+ root_init_template = "api_template.__init__.py",
+)
+
+gen_api_init_files(
+ name = "tf_python_api_gen_v2",
+ srcs = ["api_template.__init__.py"],
+ api_version = 2,
+ compat_api_versions = [1],
+ output_dir = "_api/v2/",
+ output_files = TENSORFLOW_API_INIT_FILES_V2,
+ output_package = "tensorflow._api.v2",
root_init_template = "api_template.__init__.py",
)
@@ -606,7 +593,10 @@ py_library(
py_library(
name = "tensorflow_py_no_contrib",
- srcs = [":tensorflow_python_api_gen"],
+ srcs = select({
+ "api_version_2": [":tf_python_api_gen_v2"],
+ "//conditions:default": [":tf_python_api_gen_v1"],
+ }) + [":root_init_gen"],
srcs_version = "PY2AND3",
visibility = ["//visibility:public"],
deps = ["//tensorflow/python:no_contrib"],
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index 779f65d5b17c350833f67f07985b00e8eb561e72..2de740e145f93b151faf5c987808dbdf73fb4fd7 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -14,15 +14,16 @@
# ==============================================================================
"""Bring in all of the public TensorFlow interface into this module."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import as _absolute_import
+from __future__ import division as _division
+from __future__ import print_function as _print_function
+
+import os as _os
# pylint: disable=g-bad-import-order
from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import
try:
- import os # pylint: disable=g-import-not-at-top
# Add `estimator` attribute to allow access to estimator APIs via
# "tf.estimator..."
from tensorflow.python.estimator.api import estimator # pylint: disable=g-import-not-at-top
@@ -30,9 +31,8 @@ try:
# Add `estimator` to the __path__ to allow "from tensorflow.estimator..."
# style imports.
from tensorflow.python.estimator import api as estimator_api # pylint: disable=g-import-not-at-top
- __path__ += [os.path.dirname(estimator_api.__file__)]
+ __path__ += [_os.path.dirname(estimator_api.__file__)]
del estimator_api
- del os
except (ImportError, AttributeError):
print('tf.estimator package not installed.')
@@ -41,19 +41,32 @@ except (ImportError, AttributeError):
from tensorflow.python.util.lazy_loader import LazyLoader # pylint: disable=g-import-not-at-top
contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
del LazyLoader
+# The templated code that replaces the placeholder above sometimes
+# sets the __all__ variable. If it does, we have to be sure to add
+# "contrib".
+if '__all__' in vars():
+ vars()['__all__'].append('contrib')
from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top
app.flags = flags # pylint: disable=undefined-variable
-del absolute_import
-del division
-del print_function
+# Make sure directory containing top level submodules is in
+# the __path__ so that "from tensorflow.foo import bar" works.
+_tf_api_dir = _os.path.dirname(_os.path.dirname(app.__file__)) # pylint: disable=undefined-variable
+if _tf_api_dir not in __path__:
+ __path__.append(_tf_api_dir)
# These symbols appear because we import the python package which
# in turn imports from tensorflow.core and tensorflow.python. They
# must come from this module. So python adds these symbols for the
# resolution to succeed.
# pylint: disable=undefined-variable
-del python
-del core
+try:
+ del python
+ del core
+except NameError:
+ # Don't fail if these modules are not available.
+ # For e.g. we are using this file for compat.v1 module as well and
+ # 'python', 'core' directories are not under compat/v1.
+ pass
# pylint: enable=undefined-variable
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 8a9301d584775cff3ae315e6fd856b00d1734248..17e2e292eb19029d279bc12a8328edadf96f1bb8 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -117,6 +117,7 @@ tf_cuda_library(
deps = [
":c_api",
":c_api_internal",
+ "//tensorflow/c/eager:c_api",
"//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags",
"//tensorflow/contrib/tpu:all_ops",
"//tensorflow/core:core_cpu",
@@ -127,6 +128,15 @@ tf_cuda_library(
],
)
+cc_library(
+ name = "c_api_headers",
+ hdrs = [
+ "c_api.h",
+ ],
+ copts = tf_copts(),
+ visibility = ["//tensorflow:__subpackages__"],
+)
+
exports_files(
[
"version_script.lds",
@@ -194,6 +204,7 @@ tf_cuda_cc_test(
"//tensorflow:darwin": ["-headerpad_max_install_names"],
"//conditions:default": [],
}),
+ tags = ["noasan"],
# We must ensure that the dependencies can be dynamically linked since
# the shared library must be able to use core:framework.
# linkstatic = tf_kernel_tests_linkstatic(),
@@ -235,6 +246,7 @@ tf_cc_test(
":c_api_experimental",
":c_test_util",
"//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index b8adf6c1279e72d0c2056368253aa0cb470216e5..79811ceae57e0bddeb2a6f32bad7003e14e23422 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -39,6 +39,7 @@ limitations under the License.
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/partial_tensor_shape.h"
#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor.pb.h" // NOLINT
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/tensor_shape.pb.h"
#include "tensorflow/core/framework/types.h"
@@ -1240,7 +1241,7 @@ void TF_SetAttrTypeList(TF_OperationDescription* desc, const char* attr_name,
void TF_SetAttrFuncName(TF_OperationDescription* desc, const char* attr_name,
const char* value, size_t length) {
tensorflow::NameAttrList func_name;
- func_name.set_name(std::string(value, value + length));
+ func_name.set_name(string(value, value + length));
desc->node_builder.Attr(attr_name, func_name);
}
@@ -2065,7 +2066,7 @@ static void GraphImportGraphDefLocked(TF_Graph* graph, const GraphDef& def,
for (int i = 0; i < size; ++i) {
TensorId id = results.missing_unused_input_map_keys[i];
- tf_results->missing_unused_key_names_data.push_back(std::string(id.first));
+ tf_results->missing_unused_key_names_data.emplace_back(id.first);
tf_results->missing_unused_key_names[i] =
tf_results->missing_unused_key_names_data.back().c_str();
tf_results->missing_unused_key_indexes[i] = id.second;
diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index 69b3ffe2a1f620e346405607ecf742fb863aa644..d4b78138e93624a7e41e917f8210281b500661bc 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -17,11 +17,13 @@ limitations under the License.
#include "tensorflow/c/c_api_internal.h"
#include "tensorflow/compiler/jit/legacy_flags/mark_for_compilation_pass_flags.h"
+#include "tensorflow/core/framework/tensor.pb.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/platform.h"
#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
using tensorflow::FunctionDef;
using tensorflow::Node;
@@ -79,6 +81,18 @@ TF_Buffer* TF_CreateConfig(unsigned char enable_xla_compilation,
auto* gpu_options = config.mutable_gpu_options();
gpu_options->set_allow_growth(gpu_memory_allow_growth);
+ // TODO(b/113217601): This is needed for EagerContext::runner_ to use a
+ // threadpool, so that we avoid the possibility of running the runner_ in the
+ // threadpool of GPU event mgr, as that can trigger more callbacks to be
+ // scheduled on that same threadpool, causing a deadlock in cases where the
+ // caller of event_mgr->ThenExecute() blocks on the completion of the callback
+ // (as in the case of ConstOp kernel creation on GPU, which involves copying a
+ // CPU tensor to GPU).
+ // Setting a larger thread pool does not help with the Swift caller, as we use
+ // a different TFE context for each thread of execution (for running graph
+ // functions, and their send/recvs corountines).
+ config.set_inter_op_parallelism_threads(1);
+
TF_Buffer* ret = TF_NewBuffer();
TF_CHECK_OK(MessageToBuffer(config, ret));
return ret;
@@ -8494,3 +8508,237 @@ void TF_EnqueueNamedTensor(TF_Session* session, int tensor_id,
/*run_metadata*/ nullptr, status);
VLOG(1) << "Enqueuing is done.";
}
+
+TF_Buffer* TFE_GetServerDef(const char* text_proto, TF_Status* status) {
+ tensorflow::ServerDef server_def;
+ if (!tensorflow::protobuf::TextFormat::ParseFromString(text_proto,
+ &server_def)) {
+ status->status = tensorflow::errors::Internal(
+ "Invalid text proto for ServerDef: ", text_proto);
+ return nullptr;
+ }
+ status->status = tensorflow::Status();
+ TF_Buffer* ret = TF_NewBuffer();
+ TF_CHECK_OK(MessageToBuffer(server_def, ret));
+ return ret;
+}
+
+TFE_Context* TFE_CreateContextFromSession(TF_Session* session,
+ TF_Status* status) {
+ auto* opts = TFE_NewContextOptions();
+
+ // Reduce GPU memory allocation, and set appropriate config options for TFE
+ // context.
+ auto* config =
+ TF_CreateConfig(/*xla*/ false, /* gpu_memory_allow_growth */ true);
+ TFE_ContextOptionsSetConfig(opts, config->data, config->length, status);
+ if (!status->status.ok()) {
+ CHECK(!config);
+ TFE_DeleteContextOptions(opts);
+ return nullptr;
+ }
+
+ auto* ctx = TFE_NewContextFromSession(opts, session, status);
+ TF_DeleteBuffer(config);
+ TFE_DeleteContextOptions(opts);
+ return ctx;
+}
+
+// TODO: retrieve the device string via TFE_ContextListDevices()
+static const char DEFAULT_CPU_DEVICE[] =
+ "/job:localhost/replica:0/task:0/device:CPU:0";
+
+static TFE_TensorHandle* createTFEQueue(TFE_Context* ctx, TF_DataType inputType,
+ int tensor_id, TF_Status* status) {
+ std::unique_ptr queueOp(
+ TFE_NewOp(ctx, "FIFOQueueV2", status), TFE_DeleteOp);
+ TFE_OpSetDevice(queueOp.get(), DEFAULT_CPU_DEVICE, status);
+ if (!status->status.ok()) return nullptr;
+ // TODO: use NAMED_TENSOR_QUEUE_CAPACITY in S4TF compiler.
+ TFE_OpSetAttrInt(queueOp.get(), "capacity", 1);
+ TFE_OpSetAttrTypeList(queueOp.get(), "component_types", &inputType, 1);
+ auto shared_name = tensorflow::strings::StrCat("fifo_queue_", tensor_id);
+ TFE_OpSetAttrString(queueOp.get(), "shared_name", shared_name.data(),
+ shared_name.size());
+ TFE_OpSetAttrString(queueOp.get(), "container", "", 0);
+
+ // TODO: consider making this an unknown shape.
+ const int64_t* dims_ptr = nullptr;
+ int num_dims = 0;
+ TFE_OpSetAttrShapeList(queueOp.get(), "shapes", &dims_ptr, &num_dims,
+ /*num_values*/ 0, status);
+ if (!status->status.ok()) return nullptr;
+
+ int num_retvals = 1;
+ TFE_TensorHandle* queue = nullptr;
+ TFE_Execute(queueOp.get(), &queue, &num_retvals, status);
+ if (!status->status.ok()) return nullptr;
+ CHECK_EQ(num_retvals, 1);
+
+ return queue;
+}
+
+static void createTFEEnqueue(TFE_Context* ctx, TF_DataType inputType,
+ TFE_TensorHandle* queue, TFE_TensorHandle* tensor,
+ TF_Status* status) {
+ TFE_Op* op = TFE_NewOp(ctx, "QueueEnqueueV2", status);
+ if (!status->status.ok()) return;
+ std::unique_ptr op_deleter(op, TFE_DeleteOp);
+ TFE_OpSetDevice(op, DEFAULT_CPU_DEVICE, status);
+ if (!status->status.ok()) return;
+ TFE_OpAddInput(op, queue, status);
+ if (!status->status.ok()) return;
+ TFE_OpAddInput(op, tensor, status);
+ if (!status->status.ok()) return;
+ TFE_OpSetAttrTypeList(op, "Tcomponents", &inputType, 1);
+ TFE_OpSetAttrInt(op, "timeout_ms", -1);
+
+ int num_retvals = 0;
+ TFE_Execute(op, nullptr /*retvals*/, &num_retvals, status);
+ if (!status->status.ok()) return;
+ CHECK_EQ(num_retvals, 0);
+}
+
+static TFE_TensorHandle* createTFEDequeue(TFE_Context* ctx,
+ TF_DataType inputType,
+ TFE_TensorHandle* queue,
+ TF_Status* status) {
+ TFE_Op* op = TFE_NewOp(ctx, "QueueDequeueV2", status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr op_deleter(op, TFE_DeleteOp);
+ TFE_OpSetDevice(op, DEFAULT_CPU_DEVICE, status);
+ if (!status->status.ok()) return nullptr;
+
+ TFE_OpAddInput(op, queue, status);
+ if (!status->status.ok()) return nullptr;
+ TFE_OpSetAttrTypeList(op, "component_types", &inputType, 1);
+ TFE_OpSetAttrInt(op, "timeout_ms", -1);
+ TFE_TensorHandle* ret;
+ int num_retvals = 1;
+ TFE_Execute(op, &ret, &num_retvals, status);
+ if (!status->status.ok()) return nullptr;
+ CHECK_EQ(num_retvals, 1);
+ return ret;
+}
+
+TFE_TensorHandle* TFE_DequeueNamedTensor(TF_Session* session, int tensor_id,
+ TF_DataType inputType,
+ TF_Status* status) {
+ assert(session);
+ VLOG(1) << "Dequeuing data tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ auto* ret = createTFEDequeue(ctx, inputType, queue, status);
+ return ret;
+}
+
+TFE_TensorHandle* TFE_DequeueNamedTensorFromCtx(TFE_Context* ctx, int tensor_id,
+ TF_DataType inputType,
+ TF_Status* status) {
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ auto* ret = createTFEDequeue(ctx, inputType, queue, status);
+
+ return ret;
+}
+
+void TFE_EnqueueNamedTensor(TF_Session* session, int tensor_id,
+ TFE_TensorHandle* tensor, TF_Status* status) {
+ assert(session);
+ VLOG(1) << "Enqueuing data tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TF_DataType inputType = TFE_TensorHandleDataType(tensor);
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ createTFEEnqueue(ctx, inputType, queue, tensor, status);
+}
+
+void TFE_EnqueueNamedTensorFromCtx(TFE_Context* ctx, int tensor_id,
+ TFE_TensorHandle* tensor,
+ TF_Status* status) {
+ VLOG(1) << "Enqueuing data tensor with id " << tensor_id;
+
+ TF_DataType inputType = TFE_TensorHandleDataType(tensor);
+ TFE_TensorHandle* queue = createTFEQueue(ctx, inputType, tensor_id, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ createTFEEnqueue(ctx, inputType, queue, tensor, status);
+}
+
+void TFE_EnqueueVariantTensor(TF_Session* session, int tensor_id,
+ TFE_TensorHandle* tensor, TF_Status* status) {
+ VLOG(1) << "Enqueuing variant tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TFE_TensorHandle* queue = createTFEQueue(ctx, TF_VARIANT, tensor_id, status);
+ if (!status->status.ok()) return;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ createTFEEnqueue(ctx, TF_VARIANT, queue, tensor, status);
+}
+
+TFE_TensorHandle* TFE_DequeueVariantTensor(TF_Session* session, int tensor_id,
+ TF_Status* status) {
+ VLOG(1) << "Dequeuing variant tensor with id " << tensor_id;
+
+ auto ctx = TFE_CreateContextFromSession(session, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr ctx_deleter(
+ ctx, TFE_DeleteContext);
+
+ TFE_TensorHandle* queue = createTFEQueue(ctx, TF_VARIANT, tensor_id, status);
+ if (!status->status.ok()) return nullptr;
+ std::unique_ptr
+ queue_deleter(queue, TFE_DeleteTensorHandle);
+
+ return createTFEDequeue(ctx, TF_VARIANT, queue, status);
+}
+
+static void CheckOk(TF_Status* status) {
+ CHECK_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
+}
+
+void TFE_TensorHandlePrintDebugString(TFE_TensorHandle* handle) {
+ auto* status = TF_NewStatus();
+ TF_Tensor* t = TFE_TensorHandleResolve(handle, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ tensorflow::Tensor dst;
+ TF_CHECK_OK(TF_TensorToTensor(t, &dst));
+ LOG(INFO) << dst.DebugString();
+
+ TF_DeleteTensor(t);
+ TF_DeleteStatus(status);
+}
+
+TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status,
+ const char* errMsg) {
+ status->status = tensorflow::errors::Internal(errMsg);
+}
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index 6617c5a572e90e78369f73d714f39942f213040f..d98d532e32e891e21f5b7ba360c74c3256fb1947 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -20,6 +20,7 @@ limitations under the License.
#include
#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/eager/c_api.h"
// --------------------------------------------------------------------------
// Experimental C API for TensorFlow.
@@ -130,6 +131,57 @@ TF_CAPI_EXPORT extern void TF_EnqueueNamedTensor(TF_Session* session,
int tensor_id,
TF_Tensor* tensor,
TF_Status* status);
+// Create a serialized tensorflow.ServerDef proto.
+TF_Buffer* TFE_GetServerDef(const char* text_proto, TF_Status* status);
+
+// TODO: remove this API in favor of the next one.
+TF_CAPI_EXPORT extern TFE_Context* TFE_NewContextFromSession(
+ const TFE_ContextOptions* opts, TF_Session* sess, TF_Status* status);
+
+// Creates from `session` a new eager context to run a graph function or
+// sends/recvs, so that these concurrent TFE executions can share (via
+// `session` and its associated device mgr) the same set of fifo queue resource
+// ops, used for host<->TF tensor transfers. This way the sends/recvs calls and
+// graph function execution can access the same fifo queue resource handles
+// (associated with devices managed by the device manager, which can be obtained
+// from `session`).
+//
+// TODO: Remove this function once we migrate away from using session.
+TF_CAPI_EXPORT extern TFE_Context* TFE_CreateContextFromSession(
+ TF_Session* session, TF_Status* status);
+
+// TODO: Retire this API in favor of the next one.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueNamedTensor(
+ TF_Session* session, int tensor_id, TF_DataType inputType,
+ TF_Status* status);
+
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueNamedTensorFromCtx(
+ TFE_Context* ctx, int tensor_id, TF_DataType inputType, TF_Status* status);
+
+TF_CAPI_EXPORT extern void TFE_EnqueueNamedTensor(TF_Session* session,
+ int tensor_id,
+ TFE_TensorHandle* tensor,
+ TF_Status* status);
+
+TF_CAPI_EXPORT extern void TFE_EnqueueNamedTensorFromCtx(
+ TFE_Context* ctx, int tensor_id, TFE_TensorHandle* tensor,
+ TF_Status* status);
+
+// TODO: consider folding the 2 APIs below into the ones above.
+TF_CAPI_EXPORT extern void TFE_EnqueueVariantTensor(TF_Session* session,
+ int tensor_id,
+ TFE_TensorHandle* tensor,
+ TF_Status* status);
+
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueVariantTensor(
+ TF_Session* session, int tensor_id, TF_Status* status);
+
+// Prints `handle` in a human readable format to standard output for debugging.
+TF_CAPI_EXPORT extern void TFE_TensorHandlePrintDebugString(
+ TFE_TensorHandle* handle);
+
+TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status,
+ const char* errMsg);
#ifdef __cplusplus
} /* end extern "C" */
diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc
index 30fcfd401d9d634962d64aaa3bf348de91f2ecae..c6effd39697e0397278770b53e98508074f99862 100644
--- a/tensorflow/c/c_api_experimental_test.cc
+++ b/tensorflow/c/c_api_experimental_test.cc
@@ -16,8 +16,10 @@ limitations under the License.
#include "tensorflow/c/c_api_experimental.h"
#include "tensorflow/c/c_test_util.h"
#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/tensorflow_server.pb.h"
namespace tensorflow {
namespace {
@@ -116,5 +118,49 @@ TEST(CAPI_EXPERIMENTAL, ImagenetIteratorGetNext) {
TF_DeleteStatus(s);
}
+TEST(CAPI_EXPERIMENTAL, GetServerDefTest) {
+ const string expected_text_proto(R"(cluster {
+ job {
+ name: "worker"
+ tasks {
+ key: 0
+ value: "tpuserver:0"
+ }
+ tasks {
+ key: 1
+ value: "localhost:1"
+ }
+ }
+}
+job_name: "worker"
+task_index: 1
+protocol: "grpc"
+)");
+
+ TF_Status* status = TF_NewStatus();
+ TF_Buffer* result = TFE_GetServerDef(expected_text_proto.c_str(), status);
+ EXPECT_EQ(TF_GetCode(status), TF_OK);
+
+ ServerDef actual;
+ ASSERT_TRUE(actual.ParseFromArray(result->data, result->length));
+ string actual_text_proto;
+ tensorflow::protobuf::TextFormat::PrintToString(actual, &actual_text_proto);
+ EXPECT_EQ(expected_text_proto, actual_text_proto);
+
+ const string malformed_text_proto(R"(cluster {
+ job {
+ name: "worker")");
+ TF_Buffer* null_result =
+ TFE_GetServerDef(malformed_text_proto.c_str(), status);
+ EXPECT_NE(TF_GetCode(status), TF_OK);
+ EXPECT_TRUE(tensorflow::str_util::StrContains(
+ TF_Message(status), "Invalid text proto for ServerDef"));
+ EXPECT_EQ(null_result, nullptr);
+
+ // Cleanup
+ TF_DeleteBuffer(result);
+ TF_DeleteStatus(status);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index a2c5a42c11361779de61b515e0f08dcc45e609b9..f68f8a3e90a971b5e4a024feaf26ba498afc48da 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/core/framework/function.pb.h"
#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/framework/node_def_util.h"
+#include "tensorflow/core/framework/tensor.pb.h" // NOLINT
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/lib/strings/base64.h"
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index aa2a537f03be31ae45ff3d6f7815b449d661cf9c..03516c39dc970aa23967107d3a0446da94669465 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -259,8 +259,8 @@ TEST(CAPI, DeprecatedSession) {
TF_Run(session, run_options, nullptr, nullptr, 0, nullptr, nullptr, 0,
nullptr, 0, run_metadata, s);
EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s)) << TF_Message(s);
- EXPECT_EQ(std::string("Session was not created with a graph before Run()!"),
- std::string(TF_Message(s)));
+ EXPECT_EQ("Session was not created with a graph before Run()!",
+ string(TF_Message(s)));
TF_DeleteBuffer(run_metadata);
TF_DeleteBuffer(run_options);
@@ -1224,8 +1224,8 @@ class CApiColocationTest : public ::testing::Test {
TF_OperationGetAttrMetadata(op, tensorflow::kColocationAttrName, s_);
if (expected.empty()) {
ASSERT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)) << TF_Message(s_);
- EXPECT_EQ(std::string("Operation 'add' has no attr named '_class'."),
- std::string(TF_Message(s_)));
+ EXPECT_EQ("Operation 'add' has no attr named '_class'.",
+ string(TF_Message(s_)));
return;
}
EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_);
@@ -1369,16 +1369,16 @@ TEST(CAPI, SavedModel) {
input.flat()(i) = example.SerializeAsString();
}
- const tensorflow::string input_op_name =
- std::string(tensorflow::ParseTensorName(input_name).first);
+ const tensorflow::string input_op_name(
+ tensorflow::ParseTensorName(input_name).first);
TF_Operation* input_op =
TF_GraphOperationByName(graph, input_op_name.c_str());
ASSERT_TRUE(input_op != nullptr);
csession.SetInputs({{input_op, TF_TensorFromTensor(input, s)}});
ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
- const tensorflow::string output_op_name =
- std::string(tensorflow::ParseTensorName(output_name).first);
+ const tensorflow::string output_op_name(
+ tensorflow::ParseTensorName(output_name).first);
TF_Operation* output_op =
TF_GraphOperationByName(graph, output_op_name.c_str());
ASSERT_TRUE(output_op != nullptr);
diff --git a/tensorflow/c/checkpoint_reader.cc b/tensorflow/c/checkpoint_reader.cc
index 74bc25a491ac01cb725d1c004197e48727c30230..d3311f0cd06f2b151c3567735eb41b5baf72e102 100644
--- a/tensorflow/c/checkpoint_reader.cc
+++ b/tensorflow/c/checkpoint_reader.cc
@@ -125,7 +125,7 @@ CheckpointReader::BuildV2VarMaps() {
const auto& slice_proto = entry.slices(i);
CHECK(filtered_keys
.insert(EncodeTensorNameSlice(
- std::string(v2_reader_->key()) /* full var's name */,
+ string(v2_reader_->key()) /* full var's name */,
TensorSlice(slice_proto)))
.second);
}
@@ -138,11 +138,11 @@ CheckpointReader::BuildV2VarMaps() {
new TensorSliceReader::VarToDataTypeMap);
v2_reader_->Seek(kHeaderEntryKey);
for (v2_reader_->Next(); v2_reader_->Valid(); v2_reader_->Next()) {
- if (filtered_keys.count(std::string(v2_reader_->key())) > 0) continue;
+ if (filtered_keys.count(string(v2_reader_->key())) > 0) continue;
CHECK(entry.ParseFromArray(v2_reader_->value().data(),
v2_reader_->value().size()))
<< entry.InitializationErrorString();
- string key = std::string(v2_reader_->key());
+ string key(v2_reader_->key());
(*var_to_shape_map)[key] = TensorShape(entry.shape());
(*var_to_data_type_map)[key] = DataType(entry.dtype());
}
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 37be52f57d865c1e59611540d5dab04b59e89444..3ee31a6a7ac641bbd3fc4c05568b61e433a1d523 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -68,7 +68,10 @@ tf_cuda_library(
tf_cuda_library(
name = "c_api_internal",
hdrs = ["c_api_internal.h"],
- visibility = ["//tensorflow:internal"],
+ visibility = [
+ "//learning/deepmind/courier:__pkg__",
+ "//tensorflow:internal",
+ ],
deps = [
":c_api",
"//tensorflow/c:c_api",
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
old mode 100644
new mode 100755
index dfb1c9a37644c726e1eabab775593596d5b556b9..3554ec0bf3202b54bfc38d67e51b89df19832302
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -244,8 +244,8 @@ void TFE_ContextOptionsSetConfig(TFE_ContextOptions* options, const void* proto,
}
void TFE_ContextOptionsSetAsync(TFE_ContextOptions* options,
- unsigned char async) {
- options->async = async;
+ unsigned char enable) {
+ options->async = enable;
}
void TFE_ContextOptionsSetDevicePlacementPolicy(
TFE_ContextOptions* options, TFE_ContextDevicePlacementPolicy policy) {
@@ -253,9 +253,9 @@ void TFE_ContextOptionsSetDevicePlacementPolicy(
}
TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx,
- unsigned char async,
+ unsigned char enable,
TF_Status* status) {
- status->status = ctx->context.SetAsyncForThread(async);
+ status->status = ctx->context.SetAsyncForThread(enable);
}
void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; }
@@ -273,7 +273,20 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
new tensorflow::IntraProcessRendezvous(device_mgr.get());
return new TFE_Context(opts->session_options.options, opts->policy,
- opts->async, std::move(device_mgr), r);
+ opts->async, device_mgr.release(),
+ /*device_mgr_owned*/ true, r);
+}
+
+TFE_Context* TFE_NewContextFromSession(const TFE_ContextOptions* opts,
+ TF_Session* sess, TF_Status* status) {
+ const tensorflow::DeviceMgr* device_mgr = nullptr;
+ status->status = sess->session->LocalDeviceManager(&device_mgr);
+ if (!status->status.ok()) return nullptr;
+ tensorflow::Rendezvous* r =
+ new tensorflow::IntraProcessRendezvous(device_mgr);
+ return new TFE_Context(opts->session_options.options, opts->policy,
+ opts->async, device_mgr, /*device_mgr_owned*/ false,
+ r);
}
void TFE_DeleteContext(TFE_Context* ctx) { delete ctx; }
@@ -362,6 +375,17 @@ int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status) {
return result;
}
+int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h, TF_Status* status) {
+ if (h == nullptr || h->handle == nullptr) {
+ status->status = tensorflow::errors::InvalidArgument(
+ "The passed in handle is a nullptr");
+ return -1;
+ }
+ tensorflow::int64 result;
+ status->status = h->handle->NumElements(&result);
+ return result;
+}
+
int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index,
TF_Status* status) {
if (h == nullptr || h->handle == nullptr) {
@@ -386,6 +410,19 @@ const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) {
: d->name().c_str();
}
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_TensorHandleCopySharingTensor(
+ TFE_TensorHandle* h, TF_Status* status) {
+ if (h == nullptr || h->handle == nullptr) {
+ status->status = tensorflow::errors::InvalidArgument(
+ "The passed in handle is a nullptr");
+ return nullptr;
+ }
+
+ h->handle->Ref();
+
+ return new TFE_TensorHandle(h->handle);
+}
+
TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) {
if (h == nullptr || h->handle == nullptr) {
status->status = tensorflow::errors::InvalidArgument(
@@ -541,6 +578,21 @@ void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name,
op->operation.MutableAttrs()->Set(attr_name, attr_value);
}
+void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name,
+ const char* data, size_t length) {
+ tensorflow::AttrValue attr_value;
+ tensorflow::NameAttrList* func = attr_value.mutable_func();
+ func->set_name(data, length);
+ op->operation.MutableAttrs()->Set(attr_name, attr_value);
+}
+
+void TFE_OpSetAttrTensor(TFE_Op* op, const char* attr_name, TF_Tensor* tensor,
+ TF_Status* status) {
+ tensorflow::Tensor t;
+ status->status = TF_TensorToTensor(tensor, &t);
+ if (status->status.ok()) op->operation.MutableAttrs()->Set(attr_name, t);
+}
+
void TFE_OpSetAttrStringList(TFE_Op* op, const char* attr_name,
const void* const* values, const size_t* lengths,
int num_values) {
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
old mode 100644
new mode 100755
index a0ebc6fa0a22ed61be91c2974352c2988fb4cd92..b2454d872207e26feb3764671474a5d87c01f84d
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -76,7 +76,7 @@ typedef enum TFE_ContextDevicePlacementPolicy {
// Sets the default execution mode (sync/async). Note that this can be
// overridden per thread using TFE_ContextSetAsyncForThread.
TF_CAPI_EXPORT extern void TFE_ContextOptionsSetAsync(TFE_ContextOptions*,
- unsigned char async);
+ unsigned char enable);
TF_CAPI_EXPORT extern void TFE_ContextOptionsSetDevicePlacementPolicy(
TFE_ContextOptions*, TFE_ContextDevicePlacementPolicy);
@@ -114,7 +114,7 @@ TFE_ContextGetDevicePlacementPolicy(TFE_Context*);
// Overrides the execution mode (sync/async) for the current thread.
TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context*,
- unsigned char async,
+ unsigned char enable,
TF_Status* status);
// A tensorflow.ServerDef specifies remote workers (in addition to the current
@@ -163,6 +163,8 @@ TF_CAPI_EXPORT extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h);
// This function will block till the operation that produces `h` has completed.
TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h,
TF_Status* status);
+TF_CAPI_EXPORT extern int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h,
+ TF_Status* status);
// This function will block till the operation that produces `h` has completed.
TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h,
int dim_index,
@@ -171,6 +173,12 @@ TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h,
TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceName(
TFE_TensorHandle* h, TF_Status* status);
+// Return a pointer to a new TFE_TensorHandle that shares the underlying tensor
+// with `h`. On success, `status` is set to OK. On failure, `status` reflects
+// the error and a nullptr is returned.
+TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_TensorHandleCopySharingTensor(
+ TFE_TensorHandle* h, TF_Status* status);
+
// This function will block till the operation that produces `h` has
// completed. The memory returned might alias the internal memory used by
// TensorFlow. Hence, callers should not mutate this memory (for example by
@@ -305,6 +313,14 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunction(TFE_Op* op,
const char* attr_name,
const TFE_Op* value);
+TF_CAPI_EXPORT void TFE_OpSetAttrFunctionName(TFE_Op* op, const char* attr_name,
+ const char* data, size_t length);
+
+TF_CAPI_EXPORT extern void TFE_OpSetAttrTensor(TFE_Op* op,
+ const char* attr_name,
+ TF_Tensor* tensor,
+ TF_Status* status);
+
TF_CAPI_EXPORT extern void TFE_OpSetAttrStringList(TFE_Op* op,
const char* attr_name,
const void* const* values,
diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h
index a5c0681e2e4eddae08954d9d0178ca96a3f8f29a..104d52430cf7aa14d4d2a335a1b96e667f21ce87 100644
--- a/tensorflow/c/eager/c_api_internal.h
+++ b/tensorflow/c/eager/c_api_internal.h
@@ -62,15 +62,14 @@ struct TFE_ContextOptions {
};
struct TFE_Context {
- explicit TFE_Context(const tensorflow::SessionOptions& opts,
- TFE_ContextDevicePlacementPolicy default_policy,
- bool async,
- std::unique_ptr device_mgr,
- tensorflow::Rendezvous* rendezvous)
+ TFE_Context(const tensorflow::SessionOptions& opts,
+ TFE_ContextDevicePlacementPolicy default_policy, bool async,
+ const tensorflow::DeviceMgr* device_mgr, bool device_mgr_owned,
+ tensorflow::Rendezvous* rendezvous)
: context(opts,
static_cast(
default_policy),
- async, std::move(device_mgr), rendezvous) {}
+ async, device_mgr, device_mgr_owned, rendezvous) {}
tensorflow::EagerContext context;
};
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index 7126227cf529023eadf38984668a40118641bb1b..55331022b9dbd0696928fa44430f340f371432ac 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -1528,4 +1528,29 @@ TEST(CAPI, StringAttributes) {
TFE_DeleteContext(ctx);
TF_DeleteStatus(status);
}
+
+TEST(CAPI, TestTFE_TensorHandleCopySharingUnderlyingTensorHandle) {
+ TFE_TensorHandle* h = TestMatrixTensorHandle();
+ EXPECT_EQ(TF_FLOAT, TFE_TensorHandleDataType(h));
+
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+
+ TFE_TensorHandle* h_shares_tensor =
+ TFE_TensorHandleCopySharingTensor(h, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ TF_Tensor* t = TFE_TensorHandleResolve(h_shares_tensor, status.get());
+ ASSERT_EQ(16, TF_TensorByteSize(t));
+ float data[4] = {0};
+ memcpy(&data[0], TF_TensorData(t), TF_TensorByteSize(t));
+ EXPECT_EQ(1.0, data[0]);
+ EXPECT_EQ(2.0, data[1]);
+ EXPECT_EQ(3.0, data[2]);
+ EXPECT_EQ(4.0, data[3]);
+ TF_DeleteTensor(t);
+
+ TFE_DeleteTensorHandle(h);
+ TFE_DeleteTensorHandle(h_shares_tensor);
+}
} // namespace
diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index 5607c9dcb0bbec72b2f86def3dd4e6590d73197b..008f088c2dcdd7d9114103516a4702e47a55c6de 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -99,8 +99,6 @@ TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) {
TFE_OpAddInput(op, b, status);
CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TF_DeleteStatus(status);
- TFE_OpSetAttrBool(op, "transpose_a", 0);
- TFE_OpSetAttrBool(op, "transpose_b", 0);
TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(a));
return op;
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 1adb0458c35193117b5fa5cfe9ceffbaaf699af7..5ba55a203ff70cc64c07e96b5a869a1f11c9334e 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -29,15 +29,8 @@ limitations under the License.
namespace tensorflow {
namespace eager {
-// Information about a tensor.
-struct TapeTensor {
- int64 id; // Expected to be unique in the lifetime of this process.
- DataType dtype;
- TensorShape shape;
-};
-
// Represents an entry in the tape.
-template
+template
struct OpTapeEntry {
string op_type;
std::vector output_tensor_info;
@@ -57,8 +50,8 @@ struct OpTapeEntry {
using TensorTape = gtl::FlatMap;
// Map from operation-id to tape entry.
-template
-using OpTape = gtl::FlatMap>;
+template
+using OpTape = gtl::FlatMap>;
// Operations the tape needs to perform on tensors to do backpropagation. Named
// "vspace" because a subset of these are related to a vector space, such as
@@ -79,7 +72,7 @@ using OpTape = gtl::FlatMap>;
// TODO(apassos) provide concrete template instantiations for TFE_TensorHandle
// specialization, which is blocked by quite a few things needing to loop back
// into python now.
-template
+template
class VSpace {
public:
virtual ~VSpace() {}
@@ -93,10 +86,10 @@ class VSpace {
gtl::ArraySlice gradient_tensors) const = 0;
// Returns a tensor of the right shape and dtype filled with zeros.
- virtual Gradient* Zeros(TensorShape shape, DataType dtype) const = 0;
+ virtual Gradient* Zeros(const TapeTensor& tensor) const = 0;
// Returns a Tensor which is filled with ones and like the input.
- virtual Gradient* Ones(TensorShape shape, DataType dtype) const = 0;
+ virtual Gradient* Ones(const TapeTensor& tensor) const = 0;
// Calls the passed-in backward function.
virtual Status CallBackwardFunction(
@@ -114,7 +107,7 @@ class VSpace {
// Traces the execution of operations, doing eager garbage collection, and
// exporting a full trace so other code can do backpropagation. Not thread-safe.
-template
+template
class GradientTape {
public:
// If `persistent` is true, GradientTape will not eagerly delete backward
@@ -134,10 +127,10 @@ class GradientTape {
void Watch(int64 tensor_id);
void RecordOperation(
- const string& op_type, gtl::ArraySlice output_tensors,
+ const string& op_type, std::vector& output_tensors,
gtl::ArraySlice input_tensor_id,
gtl::ArraySlice input_dtypes,
- BackwardFunction* backward_function,
+ const std::function& backward_function_getter,
const std::function& backward_function_deleter);
void DeleteTrace(int64 tensor_id);
@@ -146,17 +139,18 @@ class GradientTape {
// once) and produces the gradient of the target tensors with respect to the
// source tensors. The output gradients are used if not empty and not
// null. The result is populated with one tensor per target element.
- Status ComputeGradient(const VSpace& vspace,
- gtl::ArraySlice target_tensor_ids,
- gtl::ArraySlice source_tensor_id,
- gtl::ArraySlice output_gradients,
- std::vector* result);
+ Status ComputeGradient(
+ const VSpace& vspace,
+ gtl::ArraySlice target_tensor_ids,
+ gtl::ArraySlice source_tensor_id,
+ gtl::ArraySlice output_gradients,
+ std::vector* result);
bool IsPersistent() const { return persistent_; }
private:
TensorTape tensor_tape_;
- OpTape op_tape_;
+ OpTape op_tape_;
int64 next_op_id_{0};
// Map from tensor id to number of remaining usages (i.e. how many entries in
@@ -186,8 +180,8 @@ inline bool IsDtypeTrainable(DataType dtype) {
}
}
-template
-bool GradientTape::ShouldRecord(
+template
+bool GradientTape::ShouldRecord(
gtl::ArraySlice tensor_ids,
gtl::ArraySlice dtypes) {
CHECK_EQ(tensor_ids.size(), dtypes.size());
@@ -201,20 +195,20 @@ bool GradientTape::ShouldRecord(
return false;
}
-template
-void GradientTape::Watch(int64 tensor_id) {
+template
+void GradientTape::Watch(
+ int64 tensor_id) {
tensor_tape_.emplace(tensor_id, -1);
}
-template
-void GradientTape::RecordOperation(
- const string& op_type, gtl::ArraySlice output_tensors,
+template
+void GradientTape::RecordOperation(
+ const string& op_type, std::vector& output_tensors,
gtl::ArraySlice input_tensor_id,
gtl::ArraySlice input_dtypes,
- BackwardFunction* backward_function,
+ const std::function& backward_function_getter,
const std::function& backward_function_deleter) {
if (!ShouldRecord(input_tensor_id, input_dtypes)) {
- backward_function_deleter(backward_function);
return;
}
std::vector ids;
@@ -229,16 +223,18 @@ void GradientTape::RecordOperation(
for (const TapeTensor& o : output_tensors) {
// Note: the tensor can have already been watched and hence be in the tape,
// so we cannot check that we're inserting it here.
- tensor_tape_[o.id] = op_id;
- tensor_usage_[o.id] = 1;
+ tensor_tape_[o.GetID()] = op_id;
+ tensor_usage_[o.GetID()] = 1;
tensors.push_back(o);
}
- op_tape_[op_id] = OpTapeEntry{
- op_type, tensors, ids, backward_function, backward_function_deleter};
+ op_tape_[op_id] = OpTapeEntry{
+ op_type, std::move(tensors), std::move(ids), backward_function_getter(),
+ backward_function_deleter};
}
-template
-void GradientTape::DeleteTrace(int64 tensor_id) {
+template
+void GradientTape::DeleteTrace(
+ int64 tensor_id) {
auto it = tensor_usage_.find(tensor_id);
if (it == tensor_usage_.end()) {
return;
@@ -261,7 +257,7 @@ void GradientTape::DeleteTrace(int64 tensor_id) {
auto op_it = op_tape_.find(op_id);
CHECK(op_it != op_tape_.end());
for (const auto& output : op_it->second.output_tensor_info) {
- if (tensor_usage_.find(output.id) != tensor_usage_.end()) {
+ if (tensor_usage_.find(output.GetID()) != tensor_usage_.end()) {
// Found a usage for an output, so cannot delete the op.
return;
}
@@ -304,9 +300,9 @@ void GradientTape::DeleteTrace(int64 tensor_id) {
namespace {
-template
+template
struct BackpropInitialState {
- OpTape op_tape;
+ OpTape op_tape;
// Map from tensor ID to how many references still exist for this tensor in
// the tape.
@@ -322,17 +318,17 @@ struct BackpropInitialState {
// If `persistent_tape` is false, op_tape is cleared and backwards functions
// not needed for gradient computation are deleted. Backwards functions that
// are needed, are copied and returned in BackpropInitialState.
-template
-BackpropInitialState PrepareBackprop(
+template
+BackpropInitialState PrepareBackprop(
gtl::ArraySlice target, const TensorTape& tensor_tape,
- OpTape* op_tape, const gtl::FlatSet& sources_set,
- bool persistent_tape) {
+ OpTape* op_tape,
+ const gtl::FlatSet& sources_set, bool persistent_tape) {
std::vector tensor_stack;
tensor_stack.reserve(target.size());
for (auto t : target) {
tensor_stack.push_back(t);
}
- BackpropInitialState result;
+ BackpropInitialState result;
while (!tensor_stack.empty()) {
int64 tensor_id = tensor_stack.back();
tensor_stack.pop_back();
@@ -383,9 +379,9 @@ BackpropInitialState PrepareBackprop(
return result;
}
-template
+template
std::vector InitialStack(
- const OpTape& op_tape,
+ const OpTape& op_tape,
const gtl::FlatMap& op_missing_tensor) {
std::vector result;
for (auto& op_entry : op_tape) {
@@ -396,13 +392,13 @@ std::vector InitialStack(
return result;
}
-template
-Status InitialGradients(const VSpace& vspace,
- gtl::ArraySlice target_tensor_ids,
- gtl::ArraySlice output_gradients,
- const TensorTape& tensor_tape,
- const OpTape& op_tape,
- gtl::FlatMap>* result) {
+template
+Status InitialGradients(
+ const VSpace& vspace,
+ gtl::ArraySlice target_tensor_ids,
+ gtl::ArraySlice output_gradients, const TensorTape& tensor_tape,
+ const OpTape& op_tape,
+ gtl::FlatMap>* result) {
for (int i = 0; i < target_tensor_ids.size(); ++i) {
const int64 id = target_tensor_ids[i];
if (output_gradients.empty() || output_gradients[i] == nullptr) {
@@ -416,11 +412,10 @@ Status InitialGradients(const VSpace& vspace,
}
bool found = false;
for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) {
- if (op_it->second.output_tensor_info[j].id == id) {
+ if (op_it->second.output_tensor_info[j].GetID() == id) {
found = true;
(*result)[id].push_back(
- vspace.Ones(op_it->second.output_tensor_info[j].shape,
- op_it->second.output_tensor_info[j].dtype));
+ vspace.Ones(op_it->second.output_tensor_info[j]));
break;
}
}
@@ -440,6 +435,27 @@ Status InitialGradients(const VSpace& vspace,
return Status::OK();
}
+// TODO(agarwal): use an automatic mechanism for handling None arguments to
+// gradient functions.
+//
+// Some gradient functions can accept None arguments for gradients. The
+// following maps the operation name to the indices at which the corresponding
+// gradient function can accept None values. e.g. FusedBatchNorm outputs 5
+// values and hence receives 5 gradient values during backprop. However the
+// gradient function uses only the first of those values and ignores the rest.
+// The entry, "FusedBatchNorm": [1, 2, 3, 4], indicates that only the gradient
+// corresponding to index 0 is used, and the gradient values at indices 1-4 are
+// ignored (and hence can be None). The backprop algorithm can then leverage
+// this by not constructing zeros to pass for those indices.
+gtl::FlatMap>* FunctionsAcceptingNoneForIndicesMap() {
+ static auto* const m = new gtl::FlatMap>({
+ {"SoftmaxCrossEntropyWithLogits", {1}},
+ {"SparseSoftmaxCrossEntropyWithLogits", {1}},
+ {"FusedBatchNorm", {1, 2, 3, 4}},
+ });
+ return m;
+}
+
} // namespace
// If over kMinAggregateCount gradients are accumulated and the total
@@ -448,16 +464,16 @@ Status InitialGradients(const VSpace& vspace,
constexpr int kMinAggregateCount = 4;
constexpr int kMinAggregateBytes = 128 * 1024 * 1024;
-template
-Status GradientTape::ComputeGradient(
- const VSpace& vspace,
+template
+Status GradientTape::ComputeGradient(
+ const VSpace& vspace,
gtl::ArraySlice target_tensor_ids,
gtl::ArraySlice source_tensor_ids,
gtl::ArraySlice output_gradients,
std::vector* result) {
gtl::FlatSet sources_set(source_tensor_ids.begin(),
source_tensor_ids.end());
- BackpropInitialState state = PrepareBackprop(
+ BackpropInitialState state = PrepareBackprop(
target_tensor_ids, tensor_tape_, &op_tape_, sources_set, persistent_);
std::vector op_stack =
InitialStack(state.op_tape, state.op_missing_tensor);
@@ -485,10 +501,6 @@ Status GradientTape::ComputeGradient(
VLOG(1) << " " << t;
}
}
- gtl::FlatMap> functions_accept_none_for_indices({
- {"SoftmaxCrossEntropyWithLogits", {1}},
- {"FusedBatchNorm", {1, 2, 3, 4}},
- });
while (!op_stack.empty()) {
const int64 op = op_stack.back();
VLOG(1) << "Popped " << op;
@@ -505,18 +517,16 @@ Status GradientTape::ComputeGradient(
out_gradients.reserve(trace.output_tensor_info.size());
bool any_gradient_nonzero = false;
for (int i = 0; i < trace.output_tensor_info.size(); ++i) {
- const int64 id = trace.output_tensor_info[i].id;
+ const int64 id = trace.output_tensor_info[i].GetID();
auto grad_it = gradients.find(id);
if (grad_it == gradients.end()) {
auto func_name_it =
- functions_accept_none_for_indices.find(trace.op_type);
- if (func_name_it != functions_accept_none_for_indices.end() &&
+ FunctionsAcceptingNoneForIndicesMap()->find(trace.op_type);
+ if (func_name_it != FunctionsAcceptingNoneForIndicesMap()->end() &&
func_name_it->second.find(i) != func_name_it->second.end()) {
out_gradients.push_back(nullptr);
} else {
- out_gradients.push_back(
- vspace.Zeros(trace.output_tensor_info[i].shape,
- trace.output_tensor_info[i].dtype));
+ out_gradients.push_back(vspace.Zeros(trace.output_tensor_info[i]));
}
} else {
any_gradient_nonzero = true;
diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index 8486b585c8587e18e8eea18a893fac0a40ff4a27..247236b760dd8c07bbb08426100b6a4d34296d2e 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -110,7 +110,7 @@ void ExtendSession(TF_Session* session, TF_Status* status) {
session->extend_before_run = false;
}
-std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
+std::string GetHandleShapeAndType(TF_Graph* graph, TF_Output output) {
Node* node = &output.oper->node;
CppShapeInferenceResult::HandleData handle_data;
handle_data.set_is_set(true);
@@ -135,9 +135,8 @@ std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
return result;
}
-void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
- const void* proto, size_t proto_len,
- TF_Status* status) {
+void SetHandleShapeAndType(TF_Graph* graph, TF_Output output, const void* proto,
+ size_t proto_len, TF_Status* status) {
tensorflow::CppShapeInferenceResult::HandleData handle_data;
if (!handle_data.ParseFromArray(proto, proto_len)) {
status->status = tensorflow::errors::InvalidArgument(
diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h
index 4bcb5bde62c8a4df4e68c1ce0daaf459434ceb5d..5cce84020bc68d912d259f51512341eb5f464a2c 100644
--- a/tensorflow/c/python_api.h
+++ b/tensorflow/c/python_api.h
@@ -54,16 +54,17 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require);
void ExtendSession(TF_Session* session, TF_Status* status);
// Returns the serialized CppShapeInferenceResult::HandleData proto for
-// `output` if its a resource tensor, or otherwise returns the empty string.
-std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output);
+// `output` if its a resource or variant tensor, or otherwise returns the empty
+// string.
+std::string GetHandleShapeAndType(TF_Graph* graph, TF_Output output);
// Sets `output` based on `proto`, which should be a serialized
-// CppShapeInferenceResult::HandleData proto.
+// CppShapeInferenceResult::HandleData proto. `output` should be a resource
+// or variant tensor.
// NOTE(skyewm): `proto` is passed a void*/size_t pair instead of a std::string
// because I couldn't get SWIG to work otherwise.
-void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
- const void* proto, size_t proto_len,
- TF_Status* status);
+void SetHandleShapeAndType(TF_Graph* graph, TF_Output output, const void* proto,
+ size_t proto_len, TF_Status* status);
} // namespace tensorflow
#endif // TENSORFLOW_C_PYTHON_API_H_
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index f56521dac0374849081fe94f16feb08e55647b56..c18b07603ae3841d3581741ab5a43f2e8b628356 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -10,11 +10,12 @@ licenses(["notice"]) # Apache 2.0
load(
"//tensorflow:tensorflow.bzl",
- "tf_cc_test",
+ "cc_library_with_android_deps",
"tf_cc_binary",
+ "tf_cc_test",
"tf_copts",
"tf_gen_op_wrappers_cc",
- "cc_library_with_android_deps",
+ "transitive_hdrs",
)
cc_library(
@@ -410,6 +411,7 @@ tf_cc_test(
srcs = ["gradients/nn_grad_test.cc"],
deps = [
":cc_ops",
+ ":cc_ops_internal",
":grad_op_registry",
":grad_testutil",
":gradient_checker",
@@ -452,11 +454,33 @@ tf_cc_test(
],
)
+# Generates separate libraries for array_ops and math_ops to reduce the dependency count of targets that depend on only these
tf_gen_op_wrappers_cc(
- name = "cc_ops",
+ name = "math_ops",
+ api_def_srcs = ["//tensorflow/core/api_def:base_api_def"],
+ op_lib_names = [
+ "math_ops",
+ ],
+ pkg = "//tensorflow/core",
+)
+
+tf_gen_op_wrappers_cc(
+ name = "array_ops",
api_def_srcs = ["//tensorflow/core/api_def:base_api_def"],
op_lib_names = [
"array_ops",
+ ],
+ pkg = "//tensorflow/core",
+)
+
+tf_gen_op_wrappers_cc(
+ name = "cc_ops",
+ api_def_srcs = ["//tensorflow/core/api_def:base_api_def"],
+ deps_internal = [
+ ":array_ops_internal",
+ ":math_ops_internal",
+ ],
+ op_lib_names = [
"audio_ops",
"candidate_sampling_ops",
"control_flow_ops",
@@ -467,7 +491,6 @@ tf_gen_op_wrappers_cc(
"logging_ops",
"lookup_ops",
"manip_ops",
- "math_ops",
"nn_ops",
"no_op",
"parsing_ops",
@@ -479,10 +502,21 @@ tf_gen_op_wrappers_cc(
"user_ops",
],
other_hdrs = [
+ "ops/array_ops.h",
"ops/const_op.h",
+ "ops/math_ops.h",
"ops/standard_ops.h",
],
+ other_hdrs_internal = [
+ "ops/array_ops_internal.h",
+ "ops/math_ops_internal.h",
+ ],
pkg = "//tensorflow/core",
+ deps = [
+ ":array_ops",
+ ":const_op",
+ ":math_ops",
+ ],
)
tf_cc_test(
@@ -716,3 +750,26 @@ tf_cc_test(
"//tensorflow/core:testlib",
],
)
+
+transitive_hdrs(
+ name = "headers",
+ visibility = ["//tensorflow:__subpackages__"],
+ deps = [
+ ":cc_ops",
+ ":client_session",
+ ":coordinator",
+ ":gradient_checker",
+ ":gradients",
+ ":ops",
+ ":queue_runner",
+ ":remote_fused_graph_ops",
+ ":scope",
+ "//tensorflow/cc/profiler",
+ "//tensorflow/cc/saved_model:constants",
+ "//tensorflow/cc/saved_model:loader",
+ "//tensorflow/cc/saved_model:reader",
+ "//tensorflow/cc/saved_model:signature_constants",
+ "//tensorflow/cc/saved_model:tag_constants",
+ "//tensorflow/cc/tools:freeze_saved_model",
+ ],
+)
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index c20ea95a15e3f53b9b26716ed7b624fa853017c9..39593370d1c243e84dc5b6091724d1d404c102b0 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -466,7 +466,7 @@ string AvoidCPPKeywords(StringPiece name) {
if (IsCPPKeyword(name)) {
return strings::StrCat(name, "_");
}
- return std::string(name);
+ return string(name);
}
void InferArgAttributes(const OpDef::ArgDef& arg,
@@ -853,11 +853,7 @@ void OpInfo::WriteClassDecl(WritableFile* h) const {
}
}
- strings::StrAppend(&class_decl, "\n");
-
- if (output_types.empty()) {
- strings::StrAppend(&class_decl, " Operation operation;\n");
- }
+ strings::StrAppend(&class_decl, "\n Operation operation;\n");
for (int i = 0; i < output_types.size(); ++i) {
strings::StrAppend(&class_decl, " ", output_types[i], " ", output_names[i],
";\n");
@@ -878,9 +874,11 @@ void OpInfo::GetOutput(string* out) const {
string return_on_error =
strings::StrCat("if (!", scope_str, ".ok()) return;");
+ strings::StrAppend(out, " this->operation = Operation(ret);\n");
+
// No outputs.
if (graph_op_def.output_arg_size() == 0) {
- strings::StrAppend(out, " this->operation = Operation(ret);\n return;\n");
+ strings::StrAppend(out, " return;\n");
return;
}
if (graph_op_def.output_arg_size() == 1) {
diff --git a/tensorflow/cc/framework/ops.h b/tensorflow/cc/framework/ops.h
index a085e1d6e2de5ad63d11eb8979ae64c26b91366f..0717e7dd4b358d6c212070374bcc3fd2f91ed0ab 100644
--- a/tensorflow/cc/framework/ops.h
+++ b/tensorflow/cc/framework/ops.h
@@ -150,7 +150,7 @@ class Input {
Initializer(const std::initializer_list& v, const TensorShape& shape) {
typedef typename RealType::type RealT;
Tensor t(DataTypeToEnum::v(), shape);
- if (t.NumElements() != v.size()) {
+ if (t.NumElements() != static_cast(v.size())) {
status = errors::InvalidArgument(
"Cannot construct a tensor with ", t.NumElements(),
" from an initializer list with ", v.size(), " elements");
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index 8c886f31711eb014fb9e9d600c9c78cf22073f71..6abc9e268e3ac97379954a34017ddffa010db67f 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -62,7 +62,7 @@ Scope::Impl::Impl(const std::shared_ptr& graph,
refiner_(refiner),
scope_used_(nullptr),
colocation_constraints_(),
- disable_shape_inference_(false) {}
+ disable_shape_inference_(refiner_ == nullptr) {}
Scope Scope::NewRootScope() {
Graph* graph = new Graph(OpRegistry::Global());
@@ -94,6 +94,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ScopeName, const string& name,
exit_on_error_(other.impl()->exit_on_error_),
kernel_label_(other.impl()->kernel_label_),
device_(other.impl()->device_),
+ assigned_device_(other.impl()->assigned_device_),
colocation_constraints_(other.impl()->colocation_constraints_),
disable_shape_inference_(other.impl()->disable_shape_inference_) {}
@@ -110,6 +111,7 @@ Scope::Impl::Impl(const Scope& other, Tags::OpName, const string& name,
exit_on_error_(other.impl()->exit_on_error_),
kernel_label_(other.impl()->kernel_label_),
device_(other.impl()->device_),
+ assigned_device_(other.impl()->assigned_device_),
colocation_constraints_(other.impl()->colocation_constraints_),
disable_shape_inference_(other.impl()->disable_shape_inference_) {}
@@ -132,6 +134,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ControlDeps,
exit_on_error_(other.impl()->exit_on_error_),
kernel_label_(other.impl()->kernel_label_),
device_(other.impl()->device_),
+ assigned_device_(other.impl()->assigned_device_),
colocation_constraints_(other.impl()->colocation_constraints_),
disable_shape_inference_(other.impl()->disable_shape_inference_) {}
@@ -163,6 +166,7 @@ Scope::Impl::Impl(const Scope& other, Tags::SingleUseScope,
exit_on_error_(other.impl()->exit_on_error_),
kernel_label_(other.impl()->kernel_label_),
device_(other.impl()->device_),
+ assigned_device_(other.impl()->assigned_device_),
colocation_constraints_(other.impl()->colocation_constraints_),
disable_shape_inference_(other.impl()->disable_shape_inference_) {}
@@ -178,6 +182,7 @@ Scope::Impl::Impl(const Scope& other, Tags::ExitOnError)
exit_on_error_(true),
kernel_label_(other.impl()->kernel_label_),
device_(other.impl()->device_),
+ assigned_device_(other.impl()->assigned_device_),
colocation_constraints_(other.impl()->colocation_constraints_),
disable_shape_inference_(other.impl()->disable_shape_inference_) {}
@@ -194,6 +199,7 @@ Scope::Impl::Impl(const Scope& other, Tags::KernelLabel,
exit_on_error_(other.impl()->exit_on_error_),
kernel_label_(kernel_label),
device_(other.impl()->device_),
+ assigned_device_(other.impl()->assigned_device_),
colocation_constraints_(other.impl()->colocation_constraints_),
disable_shape_inference_(other.impl()->disable_shape_inference_) {}
@@ -210,12 +216,30 @@ Scope::Impl::Impl(const Scope& other, Tags::Colocate,
exit_on_error_(other.impl()->exit_on_error_),
kernel_label_(other.impl()->kernel_label_),
device_(other.impl()->device_),
+ assigned_device_(other.impl()->assigned_device_),
colocation_constraints_(
clear_colocations
? std::unordered_set()
: other.impl()->GetColocationConstraints(colocate_with_op)),
disable_shape_inference_(other.impl()->disable_shape_inference_) {}
+Scope::Impl::Impl(const Scope& other, Tags::AssignedDevice,
+ const string& assigned_device)
+ : graph_(other.impl()->graph_),
+ status_(other.impl()->status_),
+ name_map_(other.impl()->name_map_),
+ refiner_(other.impl()->refiner_),
+ scope_used_(other.impl()->scope_used_),
+ control_deps_(other.impl()->control_deps_),
+ name_(other.impl()->name_),
+ op_name_(other.impl()->op_name_),
+ exit_on_error_(other.impl()->exit_on_error_),
+ kernel_label_(other.impl()->kernel_label_),
+ device_(other.impl()->device_),
+ assigned_device_(assigned_device),
+ colocation_constraints_(other.impl()->colocation_constraints_),
+ disable_shape_inference_(other.impl()->disable_shape_inference_) {}
+
std::unordered_set Scope::Impl::GetColocationConstraints(
const Operation& colocate_with_op) const {
std::unordered_set current_constraints(colocation_constraints_);
@@ -225,7 +249,7 @@ std::unordered_set Scope::Impl::GetColocationConstraints(
for (const string& entry : node_constraints) {
StringPiece s(entry);
if (str_util::ConsumePrefix(&s, kColocationGroupPrefix)) {
- current_constraints.insert(std::string(s));
+ current_constraints.emplace(s);
}
}
} else {
@@ -299,6 +323,9 @@ void Scope::UpdateBuilder(NodeBuilder* builder) const {
if (!impl()->device_.empty()) {
builder->Device(impl()->device_);
}
+ if (!impl()->assigned_device_.empty()) {
+ builder->AssignedDevice(impl()->assigned_device_);
+ }
}
string Scope::Impl::GetUniqueName(const string& prefix,
@@ -394,6 +421,10 @@ Scope Scope::WithDevice(const string& device) const {
return Scope(new Impl(*this, Impl::Tags::Device(), device));
}
+Scope Scope::WithAssignedDevice(const string& assigned_device) const {
+ return Scope(new Impl(*this, Impl::Tags::AssignedDevice(), assigned_device));
+}
+
Scope Scope::ColocateWith(const Operation& op) const {
return Scope(new Impl(*this, Impl::Tags::Colocate(), op,
/* clear_colocations */ false));
diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h
index 30c32bd44b0f22d6b29dd3836d431807d0216818..e307d8989b6647dfac8d2691ed2171c86b7f3a7c 100644
--- a/tensorflow/cc/framework/scope.h
+++ b/tensorflow/cc/framework/scope.h
@@ -133,6 +133,10 @@ class Scope {
/// the device field set to 'device'.
Scope WithDevice(const string& device) const;
+ /// Returns a new scope. All ops created within the returned scope will have
+ /// their assigned device set to `assigned_device`.
+ Scope WithAssignedDevice(const string& assigned_device) const;
+
/// Return a new scope. All ops created within the returned scope will be
/// co-located on the device where op is placed.
/// NOTE: This function is intended to be use internal libraries only for
diff --git a/tensorflow/cc/framework/scope_internal.h b/tensorflow/cc/framework/scope_internal.h
index 58adaef2e942a7fa6b0ce8d5534ac3e2fd380580..514e02e84146b6d95147d83182e5d9a07509cfa1 100644
--- a/tensorflow/cc/framework/scope_internal.h
+++ b/tensorflow/cc/framework/scope_internal.h
@@ -26,6 +26,8 @@ class ShapeRefiner;
// graph, status, name_map, and refiner.
// This is intended to enable the C API (which are used by other language
// bindings) to create a Scope and access C++ functionality (i.e. gradients).
+//
+// Shape inference is disabled if `refiner` is nullptr.
Scope NewInternalScope(Graph* graph, Status* status, ShapeRefiner* refiner);
class Scope::Impl {
@@ -58,6 +60,7 @@ class Scope::Impl {
enum class ExitOnError;
enum class KernelLabel;
enum class Colocate;
+ enum class AssignedDevice;
};
Impl(Graph* graph, Status* status, NameMap* name_map, ShapeRefiner* refiner,
@@ -74,6 +77,7 @@ class Scope::Impl {
Impl(const Scope& other, Tags::KernelLabel, const string& kernel_label);
Impl(const Scope& other, Tags::Colocate, const Operation& colocate_with_op,
bool clear_colocations);
+ Impl(const Scope& other, Tags::AssignedDevice, const string& assigned_device);
std::unordered_set GetColocationConstraints(
const Operation& colocate_with_op) const;
@@ -107,6 +111,7 @@ class Scope::Impl {
const bool exit_on_error_ = false;
const string kernel_label_ = "";
const string device_ = "";
+ const string assigned_device_ = "";
const std::unordered_set colocation_constraints_;
// If true, Scope::DoShapeInference() always returns Status:OK().
diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc
index 588e96cb196189780037f66266484962ba0385e4..2a32a2ed6f7862a29f4ce3d1aba5fdbc86adc670 100644
--- a/tensorflow/cc/gradients/nn_grad.cc
+++ b/tensorflow/cc/gradients/nn_grad.cc
@@ -143,6 +143,33 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op,
}
REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper);
+Status LeakyReluGradHelper(const Scope& scope, const Operation& op,
+ const std::vector