diff --git a/.gitignore b/.gitignore
index be75938ec401b1d72fa54773c85191aaac7d7f35..828bbe9bd3363853ae3f58f54a8d5f60cefad837 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,6 +27,7 @@ Podfile.lock
/tensorflow/contrib/lite/examples/ios/simple/data/*.txt
/tensorflow/contrib/lite/examples/ios/simple/data/*.tflite
xcuserdata/**
+/api_init_files_list.txt
# Android
.gradle
diff --git a/CODEOWNERS b/CODEOWNERS
index 007a304c3e706ce968576ec8979c08f1a3bcc552..b9f0313cc6d59d3fbdcd014e1a528126d863075a 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -45,7 +45,7 @@
# /tensorflow/contrib/session_bundle/ @nfiedel @sukritiramesh
# /tensorflow/contrib/slim/ @sguada @thenbasilmanran
# /tensorflow/contrib/stateless/ @girving
-# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst
+# /tensorflow/contrib/tensor_forest/ @gilberthendry @thomascolthurst @yupbank
# /tensorflow/contrib/testing/ @dandelionmane
# /tensorflow/contrib/timeseries/ @allenlavoie
# /tensorflow/contrib/tpu/ @frankchn @saeta @jhseu
diff --git a/README.md b/README.md
index c66f7e3f3f49ed90e4e75475185585a932049f37..e1a50c87e26d493ba3ac760f357905d89aa40dab 100644
--- a/README.md
+++ b/README.md
@@ -7,14 +7,14 @@
| **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** |
|-----------------|---------------------|------------------|-------------------|---------------|---------------|
-| [](https://www.tensorflow.org/api_docs/) | [](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [](https://ci.tensorflow.org/job/tensorflow-master-mac) | [](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [](https://ci.tensorflow.org/job/tensorflow-master-android) [  ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion)
+| [](https://www.tensorflow.org/api_docs/) |  |  |  | [](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [](https://ci.tensorflow.org/job/tensorflow-master-android) [  ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion)
**TensorFlow** is an open source software library for numerical computation using
data flow graphs. The graph nodes represent mathematical operations, while
the graph edges represent the multidimensional data arrays (tensors) that flow
between them. This flexible architecture enables you to deploy computation to one
or more CPUs or GPUs in a desktop, server, or mobile device without rewriting
-code. TensorFlow also includes TensorBoard, a data visualization toolkit.
+code. TensorFlow also includes [TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard), a data visualization toolkit.
TensorFlow was originally developed by researchers and engineers
working on the Google Brain team within Google's Machine Intelligence Research
diff --git a/RELEASE.md b/RELEASE.md
index e8459531748628fd822d876d79625fdd65798791..84d9d52868ecd55d38d6073315749d11c2340e8c 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -1,3 +1,62 @@
+# Release 1.8.0
+
+## Major Features And Improvements
+* Can now pass `tf.contrib.distribute.MirroredStrategy()` to `tf.estimator.RunConfig()` to run an Estimator model on multiple GPUs on one machine.
+* Add `tf.contrib.data.prefetch_to_device()`, which supports prefetching to GPU memory.
+* Added Gradient Boosted Trees as pre-made Estimators: BoostedTreesClassifier, BoostedTreesRegressor.
+* Add 3rd generation pipeline config for Cloud TPUs which improves performance and usability.
+* `tf.contrib.bayesflow` is moving out to it's own repo.
+* Added `tf.contrib.{proto,rpc}` to allow generic proto parsing and RPC communication[1](#rpc-issue).
+
+## Bug Fixes and Other Changes
+* `tf.data`:
+ * Add `tf.contrib.data.prefetch_to_device`, which enables prefetching dataset elements to GPU memory.
+ * Add `tf.contrib.data.AUTOTUNE`, which allows the tf.data runtime to automatically tune the prefetch buffer sizes based on your system and environment.
+ * Add `tf.contrib.data.make_csv_dataset` for building datasets of CSV files.
+* Eager Execution:
+ * With eager execution Datasets can now be used as standard python iterators (`for batch in dataset:`). Both `Dataset.__iter__()` and `Dataset.make_one_shot_iterator()` can now be used to create iterators when eager execution is enabled.
+ * Automatic device placement has been enabled (i.e., use a GPU if available automatically, without requiring an explicit `with tf.device(“/gpu:0”)`) (Fixes #14133)
+ * `tf.GradientTape` has moved out of contrib.
+* `tf.keras`:
+ * Added the fashion mnist dataset.
+ * New data preprocessing functions: `image/random_brightness`, `sequence/TimeseriesGenerator`, and `text/hashing_trick`.
+* Accelerated Linear Algebra (XLA):
+ * Select and scatter in reference util and evaluator now use lexicographical order to break ties.
+* TensorFlow Debugger (tfdbg) CLI:
+ * During tensor-filter operations, allow exclusion of nodes by regular expressions.
+ * Fix spurious background colors in some text terminals.
+* `tf.contrib`:
+ * Add meta-distribution BatchReshape which reshapes batch dimensions.
+ * `tf.contrib.layers.recompute_grad` works for explicit gradient checkpointing on TPU.
+ * Add `tf.contrib.framework.argsort`.
+ * Allow `DNNBoostedTreeCombinedEstimator` to work with core versions of feature columns and losses.
+ * Add non-linear image warping ops: `tf.contrib.image.sparse_image_warp`, `tf.contrib.image.dense_image_warp`, and `tf.contrib.image.interpolate_spline`.
+ * Fix bug in `tf.contrib.opt.MultitaskOptimizerWrapper` where types of tensors were mismatched.
+* Other:
+ * Low-level graph construction now calls the TensorFlow C API. This change should be invisible to most users, but can be disabled by setting the environment variable `TF_C_API_GRAPH_CONSTRUCTION=0` in this release. Future releases will remove the ability to disable this change. Please [file a bug](https://github.com/tensorflow/tensorflow/issues/new) if you find yourself using this escape hatch.
+ * Add description of shapes and a pointer to tutorial notebook in `tf.distributions.Distribution`.
+ * Update scatter operations:
+ * Add `tf.scatter_min` and `tf.scatter_max`
+ * Extend scatter operations to work with a scalar update parameter.
+ * Move cuDNN RNN ops to core for use in TensorFlow codebase only.
+ * Add `float64` support for `Conv2d`, `Conv2dBackpropInput`, and `Conv2dBackpropFilter`.
+ * Add `float64` support for `AvgPool`/`AvgPoolGrad`.
+ * Make graph name scope thread local so that they work correctly in multi-threaded environments.
+ * Update nsync synchronization library to avoid slow primitives on Linux.
+ * Removed need to put nsync/public on C include path when building custom ops.
+ * Add `tf.image.psnr`, `tf.image.ssim`, `tf.image.ssim_multiscale`, `tf.image.image_gradients`, `tf.image.sobel_edges`.
+ * Add links to https://js.tensorflow.org.
+ * Fix non-uniformity of orthogonal matrices.
+ * Fix bug where multi-image Estimator eval summaries were not displayed correctly.
+
+1 The cancellation logic of the RPC op contains a concurrency error. A fix has been submitted to master and will be part of the next release.
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+4d55397500, Aghasy, Alan Du, Alan Lee, Alan Yee, Alex Wiltschko, Animesh Karnewar, Ankit Gupta, Anton Matosov, Aris L, Ben Barsdell, Brent Yi, Brett Koonce, Carl Thomé, cbockman, Chikanaga Tomoyuki, Chris Tava, CéDric Deltheil, Dahan Gong, Dalmo Cirne, Daniel Erenrich, David Norman, DavidNorman, Edd Wilder-James, Fanjin Zeng, Felix Abecassis, fo40225, George Sterpu, Giovanni Terlingen, Gor Baghdasaryan, Guillaume Klein, Hanchen Li, Ilya Polenov, Jakub Kolodziejczyk, Jason Sadler, Jayaram Bobba, Jerry Liu, jinghuangintel, Jiongyan Zhang (张炯衍), Joel Shor, Jong Wook Kim, Julian Eisenschlos, Karl Lessard, Krish Ravindranath, Loo Rong Jie, Lukas Geiger, Luke Iwanski, Mahmoud Abuzaina, ManHyuk, Marvin Richter, Maximilian Mitchell, Mohammad Ashraf Bhuiyan, msofka, Mustafa Kasap, Nathan Burnham, Nathan Luehr, Naveen Marri, ngc92, nio1814, Oleg Zabluda, Ou Changkun, Panos Ipeirotis, Paul Van Eck, Peter Lee, Piotr Czapla, qjivy, Rholais Lii, Rodrigo Formigone, Russell Klopfer, ryantimjohn, Sang Han, SebastiáN RamíRez, shengfuintel, Siby Jose Plathottam, Silver Chan, Stanislaw Antol, Taehoon Lee, Tarang Chugh, Ted Chang, Thomas Bastiani, Xian Xu, Xiaoming (Jason) Cui, Yan Facai (颜发才), yaox12, Yashal Shakti Kanungo, Yong Tang, Yuan (Terry) Tang, Yuxin Wu, Ziyue(Louis) Lu
+
# Release 1.7.0
## Major Features And Improvements
@@ -177,7 +236,7 @@ Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田
* Add `complex64` support to XLA compiler.
* `bfloat` support is now added to XLA infrastructure.
* Make `ClusterSpec` propagation work with XLA devices.
- * Use a determinisitic executor to generate XLA graph.
+ * Use a deterministic executor to generate XLA graph.
* `tf.contrib`:
* `tf.contrib.distributions`:
* Add `tf.contrib.distributions.Autoregressive`.
diff --git a/SECURITY.md b/SECURITY.md
index a5ce3a62ee202f6e7d83f0fedc2777d9c88ba9b5..01886b613e5d93793953124331b57f075fe7a373 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -173,7 +173,7 @@ the progress being made towards a fix and announcement.
In addition, please include the following information along with your report:
* Your name and affiliation (if any).
-* A description the technical details of the vulnerabilities. It is very
+* A description of the technical details of the vulnerabilities. It is very
important to let us know how we can reproduce your findings.
* An explanation who can exploit this vulnerability, and what they gain when
doing so -- write an attack scenario. This will help us evaluate your report
diff --git a/WORKSPACE b/WORKSPACE
index 11c5cdb2070e79b16540a39f13cab28608962340..4ddfb9a3832ea1ea639ace887e1d601bdd857086 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -2,11 +2,11 @@ workspace(name = "org_tensorflow")
http_archive(
name = "io_bazel_rules_closure",
- sha256 = "6691c58a2cd30a86776dd9bb34898b041e37136f2dc7e24cadaeaf599c95c657",
- strip_prefix = "rules_closure-08039ba8ca59f64248bb3b6ae016460fe9c9914f",
+ sha256 = "a38539c5b5c358548e75b44141b4ab637bba7c4dc02b46b1f62a96d6433f56ae",
+ strip_prefix = "rules_closure-dbb96841cc0a5fb2664c37822803b06dab20c7d1",
urls = [
- "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz",
- "https://github.com/bazelbuild/rules_closure/archive/08039ba8ca59f64248bb3b6ae016460fe9c9914f.tar.gz", # 2018-01-16
+ "https://mirror.bazel.build/github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz",
+ "https://github.com/bazelbuild/rules_closure/archive/dbb96841cc0a5fb2664c37822803b06dab20c7d1.tar.gz", # 2018-04-13
],
)
diff --git a/configure.py b/configure.py
index 6744082d5d55c3a039b7a4efa7a539e77185cabd..6d9aba61bbc73ba1b80321d6859877c371dc5427 100644
--- a/configure.py
+++ b/configure.py
@@ -35,6 +35,7 @@ except ImportError:
_DEFAULT_CUDA_VERSION = '9.0'
_DEFAULT_CUDNN_VERSION = '7'
+_DEFAULT_NCCL_VERSION = '1.3'
_DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2'
_DEFAULT_CUDA_PATH = '/usr/local/cuda'
_DEFAULT_CUDA_PATH_LINUX = '/opt/cuda'
@@ -225,8 +226,6 @@ def setup_python(environ_cp):
# Set-up env variables used by python_configure.bzl
write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path)
write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path)
- write_to_bazelrc('build --force_python=py%s' % python_major_version)
- write_to_bazelrc('build --host_force_python=py%s' % python_major_version)
write_to_bazelrc('build --python_path=\"%s"' % python_bin_path)
environ_cp['PYTHON_BIN_PATH'] = python_bin_path
@@ -484,6 +483,8 @@ def set_cc_opt_flags(environ_cp):
if is_ppc64le():
# gcc on ppc64le does not support -march, use mcpu instead
default_cc_opt_flags = '-mcpu=native'
+ elif is_windows():
+ default_cc_opt_flags = '/arch:AVX'
else:
default_cc_opt_flags = '-march=native'
question = ('Please specify optimization flags to use during compilation when'
@@ -494,7 +495,7 @@ def set_cc_opt_flags(environ_cp):
for opt in cc_opt_flags.split():
write_to_bazelrc('build:opt --copt=%s' % opt)
# It should be safe on the same build host.
- if not is_ppc64le():
+ if not is_ppc64le() and not is_windows():
write_to_bazelrc('build:opt --host_copt=-march=native')
write_to_bazelrc('build:opt --define with_default_optimizations=true')
# TODO(mikecase): Remove these default defines once we are able to get
@@ -844,8 +845,8 @@ def reformat_version_sequence(version_str, sequence_count):
def set_tf_cuda_version(environ_cp):
"""Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION."""
ask_cuda_version = (
- 'Please specify the CUDA SDK version you want to use, '
- 'e.g. 7.0. [Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION
+ 'Please specify the CUDA SDK version you want to use. '
+ '[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION
for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
# Configure the Cuda SDK version to use.
@@ -1102,6 +1103,81 @@ def set_tf_tensorrt_install_path(environ_cp):
write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_tensorrt_version)
+def set_tf_nccl_install_path(environ_cp):
+ """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION.
+
+ Args:
+ environ_cp: copy of the os.environ.
+
+ Raises:
+ ValueError: if this method was called under non-Linux platform.
+ UserInputError: if user has provided invalid input multiple times.
+ """
+ if not is_linux():
+ raise ValueError('Currently NCCL is only supported on Linux platforms.')
+
+ ask_nccl_version = (
+ 'Please specify the NCCL version you want to use. '
+ '[Leave empty to default to NCCL %s]: ') % _DEFAULT_NCCL_VERSION
+
+ for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
+ tf_nccl_version = get_from_env_or_user_or_default(
+ environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION)
+ tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1)
+
+ if tf_nccl_version == '1':
+ break # No need to get install path, NCCL 1 is a GitHub repo.
+
+ # TODO(csigg): Look with ldconfig first if we can find the library in paths
+ # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding
+ # include directory. This is where the NCCL .deb packages install them.
+ # Then ask the user if we should use that. Instead of a single
+ # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to
+ # nccl_configure.bzl
+ default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH')
+ ask_nccl_path = (r'Please specify the location where NCCL %s library is '
+ 'installed. Refer to README.md for more details. [Default '
+ 'is %s]:') % (tf_nccl_version, default_nccl_path)
+ nccl_install_path = get_from_env_or_user_or_default(
+ environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path)
+
+ # Result returned from "read" will be used unexpanded. That make "~"
+ # unusable. Going through one more level of expansion to handle that.
+ nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path))
+ if is_windows() or is_cygwin():
+ nccl_install_path = cygpath(nccl_install_path)
+
+ if is_windows():
+ nccl_lib_path = 'lib/x64/nccl.lib'
+ elif is_linux():
+ nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version
+ elif is_macos():
+ nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version
+
+ nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path)
+ nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h')
+ if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path):
+ # Set NCCL_INSTALL_PATH
+ environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path
+ write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path)
+ break
+
+ # Reset and Retry
+ print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the '
+ 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path,
+ nccl_hdr_path))
+
+ environ_cp['TF_NCCL_VERSION'] = ''
+ else:
+ raise UserInputError('Invalid TF_NCCL setting was provided %d '
+ 'times in a row. Assuming to be a scripting mistake.' %
+ _DEFAULT_PROMPT_ASK_ATTEMPTS)
+
+ # Set TF_NCCL_VERSION
+ environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
+ write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version)
+
+
def get_native_cuda_compute_capabilities(environ_cp):
"""Get native cuda compute capabilities.
@@ -1150,6 +1226,9 @@ def set_tf_cuda_compute_capabilities(environ_cp):
ask_cuda_compute_capabilities, default_cuda_compute_capabilities)
# Check whether all capabilities from the input is valid
all_valid = True
+ # Remove all whitespace characters before splitting the string
+ # that users may insert by accident, as this will result in error
+ tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split())
for compute_capability in tf_cuda_compute_capabilities.split(','):
m = re.match('[0-9]+.[0-9]+', compute_capability)
if not m:
@@ -1438,6 +1517,8 @@ def main():
set_tf_cudnn_version(environ_cp)
if is_linux():
set_tf_tensorrt_install_path(environ_cp)
+ set_tf_nccl_install_path(environ_cp)
+
set_tf_cuda_compute_capabilities(environ_cp)
if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(
'LD_LIBRARY_PATH') != '1':
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 823393ebdf1f4b658361f31963a275a683e61002..f2ad16fa04f5beb6616c58c28d0f0c460c3e3a17 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -401,25 +401,6 @@ package_group(
],
)
-py_library(
- name = "tensorflow_py",
- srcs = ["__init__.py"],
- srcs_version = "PY2AND3",
- visibility = ["//visibility:public"],
- deps = ["//tensorflow/python"],
-)
-
-py_library(
- name = "experimental_tensorflow_py",
- srcs = ["experimental_api.py"],
- srcs_version = "PY2AND3",
- visibility = ["//tensorflow/tools/api/tests:__subpackages__"],
- deps = [
- "//tensorflow/python",
- "//tensorflow/tools/api/generator:python_api",
- ],
-)
-
load(
"//third_party/mkl:build_defs.bzl",
"if_mkl",
@@ -469,11 +450,12 @@ tf_cc_shared_object(
linkstatic = 1,
visibility = ["//visibility:public"],
deps = [
+ "//tensorflow/core:core_cpu_impl",
"//tensorflow/core:framework_internal_impl",
+ "//tensorflow/core:gpu_runtime_impl",
+ "//tensorflow/core/grappler/optimizers:custom_graph_optimizer_registry_impl",
"//tensorflow/core:lib_internal_impl",
- "//tensorflow/core:core_cpu_impl",
"//tensorflow/stream_executor:stream_executor_impl",
- "//tensorflow/core:gpu_runtime_impl",
] + tf_additional_binary_deps(),
)
@@ -553,3 +535,14 @@ exports_files(
"tf_exported_symbols.lds",
],
)
+
+py_library(
+ name = "tensorflow_py",
+ srcs = ["__init__.py"],
+ srcs_version = "PY2AND3",
+ visibility = ["//visibility:public"],
+ deps = [
+ "//tensorflow/python",
+ "//tensorflow/tools/api/generator:python_api",
+ ],
+)
diff --git a/tensorflow/__init__.py b/tensorflow/__init__.py
index 78ad6aec19f3bbbfcb389012ac1577573b3e4901..c8683e3976c90add3f1f54d8e575c798327e9273 100644
--- a/tensorflow/__init__.py
+++ b/tensorflow/__init__.py
@@ -20,14 +20,19 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+# pylint: disable=g-bad-import-order
+from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import
# pylint: disable=wildcard-import
-from tensorflow.python import * # pylint: disable=redefined-builtin
+from tensorflow.tools.api.generator.api import * # pylint: disable=redefined-builtin
# pylint: enable=wildcard-import
from tensorflow.python.util.lazy_loader import LazyLoader
contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib')
del LazyLoader
+from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top
+app.flags = flags # pylint: disable=undefined-variable
+
del absolute_import
del division
del print_function
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 2367014cd02c721ea96581919c3efc96e772d9a6..8a9301d584775cff3ae315e6fd856b00d1734248 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -122,6 +122,7 @@ tf_cuda_library(
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
+ "//tensorflow/core:lib_platform",
"//tensorflow/core:protos_all_cc",
],
)
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 18eeb2816807ec9986999cfc2c9a4c0f032683c0..b86b277ac3200b88ae03490a6c1b64d464e81950 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -2097,7 +2097,7 @@ static void GraphImportGraphDefLocked(TF_Graph* graph, const GraphDef& def,
for (int i = 0; i < size; ++i) {
TensorId id = results.missing_unused_input_map_keys[i];
- tf_results->missing_unused_key_names_data.push_back(id.first.ToString());
+ tf_results->missing_unused_key_names_data.push_back(std::string(id.first));
tf_results->missing_unused_key_names[i] =
tf_results->missing_unused_key_names_data.back().c_str();
tf_results->missing_unused_key_indexes[i] = id.second;
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index b32f574628c4d1dc5c3bb3f1265a1b12adee28bc..c8594347451dffd465d7fa926cc53818dc9e38d4 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -72,7 +72,7 @@ limitations under the License.
#ifdef SWIG
#define TF_CAPI_EXPORT
#else
-#if defined(COMPILER_MSVC)
+#if defined(_WIN32)
#ifdef TF_COMPILE_LIBRARY
#define TF_CAPI_EXPORT __declspec(dllexport)
#else
@@ -80,7 +80,7 @@ limitations under the License.
#endif // TF_COMPILE_LIBRARY
#else
#define TF_CAPI_EXPORT __attribute__((visibility("default")))
-#endif // COMPILER_MSVC
+#endif // _WIN32
#endif // SWIG
#ifdef __cplusplus
@@ -1496,7 +1496,8 @@ TF_CAPI_EXPORT extern int TF_DeviceListCount(const TF_DeviceList* list);
// If index is out of bounds, an error code will be set in the status object,
// and a null pointer will be returned.
TF_CAPI_EXPORT extern const char* TF_DeviceListName(const TF_DeviceList* list,
- int index, TF_Status*);
+ int index,
+ TF_Status* status);
// Retrieves the type of the device at the given index.
//
@@ -1506,14 +1507,15 @@ TF_CAPI_EXPORT extern const char* TF_DeviceListName(const TF_DeviceList* list,
// If index is out of bounds, an error code will be set in the status object,
// and a null pointer will be returned.
TF_CAPI_EXPORT extern const char* TF_DeviceListType(const TF_DeviceList* list,
- int index, TF_Status*);
+ int index,
+ TF_Status* status);
// Retrieve the amount of memory associated with a given device.
//
// If index is out of bounds, an error code will be set in the status object,
// and -1 will be returned.
TF_CAPI_EXPORT extern int64_t TF_DeviceListMemoryBytes(
- const TF_DeviceList* list, int index, TF_Status*);
+ const TF_DeviceList* list, int index, TF_Status* status);
// --------------------------------------------------------------------------
// Load plugins containing custom ops and kernels
diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index bea93785717e2161fcec941485ac3c3f7f3e3ed5..95b04f9058afdfaadbc24f0238860279fcd3e800 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -20,6 +20,7 @@ limitations under the License.
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/platform.h"
#include "tensorflow/core/protobuf/config.pb.h"
using tensorflow::FunctionDef;
@@ -56,57 +57,6 @@ void TF_EnableXLACompilation(TF_SessionOptions* options, unsigned char enable) {
}
}
-void TF_InitializeTPU(TF_Session* session, TF_Status* status) {
- VLOG(1) << "Initializing TPU";
- TF_Operation* config_op =
- TF_GraphOperationByName(session->graph, "ConfigureDistributedTPU");
- if (config_op == nullptr) {
- status->status = tensorflow::errors::Internal(
- "Unable to find node ConfigureDistributedTPU in the TF graph.");
- return;
- }
-
- TF_Output config_node{config_op, 0};
-
- TF_Tensor* dummy_output;
- TF_SessionRun(session, /*run_options*/ nullptr,
- // input related parameters
- /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0,
- // output related parameters
- /*outputs*/ &config_node, /*output_values*/ &dummy_output,
- /*noutputs*/ 1,
- /*targets*/ nullptr, /*ntargets*/ 0,
- /*run_metadata*/ nullptr, status);
- if (status->status.ok()) {
- TF_DeleteTensor(dummy_output);
- }
-}
-
-void TF_ShutdownTPU(TF_Session* session, TF_Status* status) {
- {
- tensorflow::mutex_lock c(session->graph->mu);
- VLOG(1) << "Shutting down TPU, with input graph: "
- << session->graph->graph.ToGraphDefDebug().DebugString();
- }
-
- TF_Operation* shutdown_op =
- TF_GraphOperationByName(session->graph, "ShutdownDistributedTPU");
- if (shutdown_op == nullptr) {
- status->status = tensorflow::errors::Internal(
- "Unable to find node ShutdownDistributedTPU in the TF graph.");
- return;
- }
-
- TF_SessionRun(session, /*run_options*/ nullptr,
- // input related parameters
- /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0,
- // output related parameters
- /*outputs*/ nullptr, /*output_values*/ nullptr,
- /*noutputs*/ 0,
- /*targets*/ &shutdown_op, /*ntargets*/ 1,
- /*run_metadata*/ nullptr, status);
-}
-
const char* TF_GraphDebugString(TF_Graph* graph, size_t* len) {
tensorflow::mutex_lock c(graph->mu);
const auto& debug_str = graph->graph.ToGraphDefDebug().DebugString();
@@ -234,12 +184,19 @@ library {
return std::move(functions[0]);
}
+#if not defined(PLATFORM_WINDOWS)
// On success, returns a set of TF_Function instances encoding a dataset
// node stack that reads a Imagenet TFRecordFile dataset from `file_path`, and
// sets `dataset_name` to the created dataset name. The returned functions must
// be deleted by calling TF_DeleteFunction.
static std::vector CreateImagenetDatasetFunctions(
const char* file_path, std::string* dataset_name, TF_Status* status) {
+#if defined(PLATFORM_WINDOWS)
+ status->status = tensorflow::errors::Unimplemented(
+ "TF_MakeFileBasedIteratorGetNextWithDatasets in the experimental C API "
+ "is not implemented for Windows");
+ return std::vector();
+#else
const char* func_def = R"PREFIX(
library {
function {
@@ -7118,8 +7075,11 @@ library {
DCHECK(found);
};
return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status);
+#endif
}
+#endif
+#if not defined(PLATFORM_WINDOWS)
// On success, returns a set of TF_Function instances encoding a dataset
// node stack that reads an MNIST file dataset from `file_path`, and
// sets `dataset_name` to the created dataset name. The returned functions must
@@ -7127,6 +7087,12 @@ library {
static std::vector CreateMNISTDatasetFunctions(
const char* file_path, int batch_size, std::string* dataset_name,
TF_Status* status) {
+#if defined(PLATFORM_WINDOWS)
+ status->status = tensorflow::errors::Unimplemented(
+ "TF_MakeFileBasedIteratorGetNextWithDatasets in the experimental C API "
+ "is not implemented for Windows");
+ return nullptr;
+#else
const char* func_def = R"PREFIX(
library {
function {
@@ -8256,7 +8222,9 @@ library {
DCHECK(found_batch_size);
};
return CreateFunctionsFromTextProto(func_def, &mutate_proto_func, status);
+#endif
}
+#endif
// Adds the input functions to `graph`. On success, returns the created
// IteratorGetNext node.
@@ -8350,6 +8318,13 @@ TF_Operation* TF_MakeFakeIteratorGetNextWithDatasets(TF_Graph* graph,
TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets(
TF_Graph* graph, const char* file_path, int batch_size,
unsigned char is_mnist, TF_Status* status) {
+#if defined(PLATFORM_WINDOWS)
+ // TODO(ashankar): get these functions working on Windows.
+ status->status = tensorflow::errors::Unimplemented(
+ "TF_MakeFileBasedIteratorGetNextWithDatasets in the experimental C API "
+ "is not implemented for Windows");
+ return nullptr;
+#else
tensorflow::Status s;
std::string dataset_name;
@@ -8391,4 +8366,92 @@ TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets(
<< graph->graph.ToGraphDefDebug().DebugString();
return getnext_node;
+#endif
+}
+
+TF_Tensor* TF_DequeueNamedTensor(TF_Session* session, int tensor_id,
+ TF_Status* status) {
+ assert(session);
+ {
+ tensorflow::mutex_lock c(session->graph->mu);
+ VLOG(1) << "Dequeuing named tensor with id " << tensor_id
+ << ", with input graph: "
+ << session->graph->graph.ToGraphDefDebug().DebugString();
+ }
+
+ TF_Operation* dequeue_op = TF_GraphOperationByName(
+ session->graph,
+ tensorflow::strings::StrCat("fifo_queue_dequeue_", tensor_id).c_str());
+ if (dequeue_op == nullptr) {
+ status->status = tensorflow::errors::Internal(
+ "Unable to find the dequeue node in the TF graph.");
+ return nullptr;
+ }
+
+ VLOG(1) << "Running the dequeue op";
+ TF_Output output{dequeue_op, 0};
+ TF_Tensor* ret;
+ TF_SessionRun(session, /*run_options*/ nullptr,
+ // input related parameters
+ /*inputs*/ nullptr, /*input_values*/ nullptr, /*ninputs*/ 0,
+ // output related parameters
+ /*outputs*/ &output, /*output_values*/ &ret,
+ /*noutputs*/ 1,
+ /*targets*/ nullptr, /*ntargets*/ 0,
+ /*run_metadata*/ nullptr, status);
+ if (VLOG_IS_ON(1) && status->status.ok()) {
+ tensorflow::Tensor tensor;
+ if (tensorflow::TF_TensorToTensor(ret, &tensor).ok()) {
+ VLOG(1) << "Dequeued tensor content: " << tensor.DebugString();
+ }
+ }
+ return ret;
+}
+
+void TF_EnqueueNamedTensor(TF_Session* session, int tensor_id,
+ TF_Tensor* tensor, TF_Status* status) {
+ assert(session);
+ {
+ tensorflow::mutex_lock c(session->graph->mu);
+ if (VLOG_IS_ON(1)) {
+ VLOG(1) << "Enqueuing named tensor with id " << tensor_id
+ << ", with input graph: "
+ << session->graph->graph.ToGraphDefDebug().DebugString();
+ tensorflow::Tensor internal_tensor;
+ if (tensorflow::TF_TensorToTensor(tensor, &internal_tensor).ok()) {
+ VLOG(1) << "Enqueu'ing tensor content: "
+ << internal_tensor.DebugString();
+ }
+ }
+ }
+
+ TF_Operation* enqueue_op = TF_GraphOperationByName(
+ session->graph,
+ tensorflow::strings::StrCat("fifo_queue_enqueue_", tensor_id).c_str());
+ if (enqueue_op == nullptr) {
+ status->status = tensorflow::errors::Internal(
+ "Unable to find the enqueue node in the TF graph.");
+ return;
+ }
+
+ TF_Operation* placeholder_op = TF_GraphOperationByName(
+ session->graph,
+ tensorflow::strings::StrCat("arg_tensor_enqueue_", tensor_id).c_str());
+ if (placeholder_op == nullptr) {
+ status->status = tensorflow::errors::Internal(
+ "Unable to find the placeholder node as input to enqueue in the TF "
+ "graph.");
+ return;
+ }
+
+ VLOG(1) << "Running the enqueue op";
+ TF_Output input{placeholder_op, 0};
+ TF_SessionRun(session, /*run_options*/ nullptr,
+ // input related parameters
+ /*inputs*/ &input, /*input_values*/ &tensor, /*ninputs*/ 1,
+ // output related parameters
+ /*outputs*/ nullptr, /*output_values*/ nullptr, /*noutputs*/ 0,
+ /*targets*/ &enqueue_op, /*ntargets*/ 1,
+ /*run_metadata*/ nullptr, status);
+ VLOG(1) << "Enqueuing is done.";
}
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index ebcec8176b63f9a91c847ebe96fba3ff023fc599..20bdace40f1272ded06e710034053a7610326e7f 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -35,7 +35,7 @@ limitations under the License.
#ifdef SWIG
#define TF_CAPI_EXPORT
#else
-#if defined(COMPILER_MSVC)
+#if defined(_WIN32)
#ifdef TF_COMPILE_LIBRARY
#define TF_CAPI_EXPORT __declspec(dllexport)
#else
@@ -43,7 +43,7 @@ limitations under the License.
#endif // TF_COMPILE_LIBRARY
#else
#define TF_CAPI_EXPORT __attribute__((visibility("default")))
-#endif // COMPILER_MSVC
+#endif // _WIN32
#endif // SWIG
#ifdef __cplusplus
@@ -60,27 +60,6 @@ extern "C" {
TF_CAPI_EXPORT extern void TF_EnableXLACompilation(TF_SessionOptions* options,
unsigned char enable);
-// Initializes TPU system. Must be called exactly once before TF_SessionRun() is
-// called on a TPU graph.
-//
-// The session graph must contain a node named ConfigureDistributedTPU.
-// TODO(b/74774824): Improve the API on initializing TPU system.
-TF_CAPI_EXPORT extern void TF_InitializeTPU(TF_Session* session,
- TF_Status* status);
-
-// Shuts down TPU system. For any `session` where TF_InitializeTPU() has
-// been successfully called, this call must be made exactly once before the
-// session is closed.
-// The session graph must contain a node named ShutdownDistributedTPU.
-TF_CAPI_EXPORT extern void TF_ShutdownTPU(TF_Session* session,
- TF_Status* status);
-
-// Returns the graph content in a human-readable format, with length set in
-// `len`. The format is subject to change in the future.
-// The returned string is heap-allocated, and caller should call free() on it.
-TF_CAPI_EXPORT extern const char* TF_GraphDebugString(TF_Graph* graph,
- size_t* len);
-
// Returns the graph content in a human-readable format, with length set in
// `len`. The format is subject to change in the future.
// The returned string is heap-allocated, and caller should call free() on it.
@@ -107,6 +86,35 @@ TF_CAPI_EXPORT extern TF_Operation* TF_MakeFileBasedIteratorGetNextWithDatasets(
TF_Graph* graph, const char* file_path, int batch_size,
unsigned char is_mnist, TF_Status* status);
+// On success, dequeues a tensor from a TF-managed FifoQueue given by
+// `tensor_id`, associated with `session`. There must be a graph node named
+// "fifo_queue_dequeue_", to be executed by this API call.
+
+// Caller must call TF_DeleteTensor() over the returned tensor. If the queue is
+// empty, this call is blocked.
+//
+// Tensors are enqueued via the corresponding TF enqueue op.
+// TODO(hongm): Add support for `timeout_ms`.
+TF_CAPI_EXPORT extern TF_Tensor* TF_DequeueNamedTensor(TF_Session* session,
+ int tensor_id,
+ TF_Status* status);
+
+// On success, enqueues `tensor` into a TF-managed FifoQueue given by
+// `tensor_id`, associated with `session`. There must be a graph node named
+// "fifo_queue_enqueue_", to be executed by this API call. It reads
+// from a placeholder node "arg_tensor_enqueue_".
+//
+// `tensor` is still owned by the caller. This call will be blocked if the queue
+// has reached its capacity, and will be unblocked when the queued tensors again
+// drop below the capacity due to dequeuing.
+//
+// Tensors are dequeued via the corresponding TF dequeue op.
+// TODO(hongm): Add support for `timeout_ms`.
+TF_CAPI_EXPORT extern void TF_EnqueueNamedTensor(TF_Session* session,
+ int tensor_id,
+ TF_Tensor* tensor,
+ TF_Status* status);
+
#ifdef __cplusplus
} /* end extern "C" */
#endif
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 028f146be31790b211e546978302e81afe26b231..577f10c5e69ea9ecbe8ce821c6bd5167e98bef25 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -53,7 +53,7 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst);
namespace {
static void ExpectHasSubstr(StringPiece s, StringPiece expected) {
- EXPECT_TRUE(StringPiece(s).contains(expected))
+ EXPECT_TRUE(str_util::StrContains(s, expected))
<< "'" << s << "' does not contain '" << expected << "'";
}
@@ -1368,7 +1368,7 @@ TEST(CAPI, SavedModel) {
}
const tensorflow::string input_op_name =
- tensorflow::ParseTensorName(input_name).first.ToString();
+ std::string(tensorflow::ParseTensorName(input_name).first);
TF_Operation* input_op =
TF_GraphOperationByName(graph, input_op_name.c_str());
ASSERT_TRUE(input_op != nullptr);
@@ -1376,7 +1376,7 @@ TEST(CAPI, SavedModel) {
ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
const tensorflow::string output_op_name =
- tensorflow::ParseTensorName(output_name).first.ToString();
+ std::string(tensorflow::ParseTensorName(output_name).first);
TF_Operation* output_op =
TF_GraphOperationByName(graph, output_op_name.c_str());
ASSERT_TRUE(output_op != nullptr);
@@ -1700,7 +1700,7 @@ TEST_F(CApiGradientsTest, OpWithNoGradientRegistered_NoGradInputs) {
TestGradientsError(false);
}
-// REGISTER_OP for CApiTestAttributesTest test cases.
+// REGISTER_OP for CApiAttributesTest test cases.
// Registers two ops, each with a single attribute called 'v'.
// The attribute in one op will have a type 'type', the other
// will have list(type).
diff --git a/tensorflow/c/checkpoint_reader.cc b/tensorflow/c/checkpoint_reader.cc
index b1f7bdaa5420a56386e6983052df20aa976aa867..74bc25a491ac01cb725d1c004197e48727c30230 100644
--- a/tensorflow/c/checkpoint_reader.cc
+++ b/tensorflow/c/checkpoint_reader.cc
@@ -125,7 +125,7 @@ CheckpointReader::BuildV2VarMaps() {
const auto& slice_proto = entry.slices(i);
CHECK(filtered_keys
.insert(EncodeTensorNameSlice(
- v2_reader_->key().ToString() /* full var's name */,
+ std::string(v2_reader_->key()) /* full var's name */,
TensorSlice(slice_proto)))
.second);
}
@@ -138,11 +138,11 @@ CheckpointReader::BuildV2VarMaps() {
new TensorSliceReader::VarToDataTypeMap);
v2_reader_->Seek(kHeaderEntryKey);
for (v2_reader_->Next(); v2_reader_->Valid(); v2_reader_->Next()) {
- if (filtered_keys.count(v2_reader_->key().ToString()) > 0) continue;
+ if (filtered_keys.count(std::string(v2_reader_->key())) > 0) continue;
CHECK(entry.ParseFromArray(v2_reader_->value().data(),
v2_reader_->value().size()))
<< entry.InitializationErrorString();
- string key = v2_reader_->key().ToString();
+ string key = std::string(v2_reader_->key());
(*var_to_shape_map)[key] = TensorShape(entry.shape());
(*var_to_data_type_map)[key] = DataType(entry.dtype());
}
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index a2d96357ac8a55be7fe03bf58e33ff1733967dd1..14321191625e448637aa44a7f6a17820159b97c2 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -31,7 +31,6 @@ tf_cuda_library(
"//tensorflow/core/common_runtime/eager:context",
"//tensorflow/core/common_runtime/eager:eager_executor",
"//tensorflow/core/common_runtime/eager:execute",
- "//tensorflow/core/common_runtime/eager:execute_node",
"//tensorflow/core/common_runtime/eager:kernel_and_device",
"//tensorflow/core/common_runtime/eager:tensor_handle",
"//tensorflow/core/common_runtime/eager:copy_to_device_node",
@@ -49,6 +48,7 @@ tf_cuda_library(
],
"//conditions:default": [],
}) + [
+ "//tensorflow/core/common_runtime/eager:eager_operation",
"//tensorflow/core:gpu_runtime",
],
)
@@ -71,6 +71,7 @@ tf_cuda_library(
"//tensorflow/core:lib_internal",
"//tensorflow/core/common_runtime/eager:context",
"//tensorflow/core/common_runtime/eager:eager_executor",
+ "//tensorflow/core/common_runtime/eager:eager_operation",
"//tensorflow/core/common_runtime/eager:kernel_and_device",
"//tensorflow/core/common_runtime/eager:tensor_handle",
],
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index bb1492fca2d568e0965481d5e95a50254971f8f3..3bf071f3abaac7dfd4113964fd49cd9322913bd5 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -34,7 +34,6 @@ limitations under the License.
#include "tensorflow/core/common_runtime/device_set.h"
#include "tensorflow/core/common_runtime/eager/copy_to_device_node.h"
#include "tensorflow/core/common_runtime/eager/execute.h"
-#include "tensorflow/core/common_runtime/eager/execute_node.h"
#include "tensorflow/core/common_runtime/function.h"
#include "tensorflow/core/common_runtime/rendezvous_mgr.h"
#include "tensorflow/core/framework/node_def_util.h"
@@ -116,9 +115,7 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
opts->async, std::move(device_mgr), r);
}
-void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) {
- delete ctx;
-}
+void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { delete ctx; }
TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) {
TF_DeviceList* list = new TF_DeviceList;
@@ -220,9 +217,6 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) {
}
return retval;
}
-} // extern "C"
-
-extern "C" {
TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name,
TF_Status* status) {
@@ -242,21 +236,18 @@ TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name,
void TFE_DeleteOp(TFE_Op* op) { delete op; }
void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) {
- tensorflow::Device* d = nullptr;
- if (device_name != nullptr && strlen(device_name) > 0) {
- status->status = op->ctx->context.FindDeviceByName(device_name, &d);
- }
- op->device = d;
+ status->status = op->operation.SetDevice(device_name);
}
const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) {
- tensorflow::Device* device =
- (op->device == nullptr) ? op->ctx->context.HostCPU() : op->device;
+ tensorflow::Device* device = (op->operation.Device() == nullptr)
+ ? op->operation.EagerContext()->HostCPU()
+ : op->operation.Device();
return device->name().c_str();
}
void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) {
- op->use_xla = enable;
+ op->operation.SetUseXla(enable);
#ifndef TENSORFLOW_EAGER_USE_XLA
LOG(WARNING) << "This call is a no-op, as the TensorFlow library is not "
"built with XLA support.";
@@ -264,22 +255,20 @@ void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) {
}
void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) {
- h->handle->Ref();
- op->inputs.push_back(h->handle);
- op->attrs.NumInputs(op->inputs.size());
+ op->operation.AddInput(h->handle);
}
TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name,
unsigned char* is_list, TF_Status* status) {
TF_AttrType ret;
- if (op->is_function()) {
+ if (op->operation.is_function()) {
status->status = tensorflow::errors::Unimplemented(
"TODO(apassos): Support for attributes for TensorFlow functions is not "
"ready yet.");
return TF_ATTR_INT; // The compiler requires that we return something.
}
- status->status =
- tensorflow::AttrTypeByName(*op->attr_types, attr_name, &ret, is_list);
+ status->status = tensorflow::AttrTypeByName(*op->operation.AttrTypes(),
+ attr_name, &ret, is_list);
return ret;
}
@@ -298,23 +287,24 @@ TF_AttrType TFE_OpNameGetAttrType(TFE_Context* ctx,
}
void TFE_OpSetAttrString(TFE_Op* op, const char* attr_name, const char* value) {
- op->attrs.Set(attr_name, value);
+ op->operation.MutableAttrs()->Set(attr_name, value);
}
void TFE_OpSetAttrInt(TFE_Op* op, const char* attr_name, int64_t value) {
- op->attrs.Set(attr_name, static_cast(value));
+ op->operation.MutableAttrs()->Set(attr_name, static_cast(value));
}
void TFE_OpSetAttrFloat(TFE_Op* op, const char* attr_name, float value) {
- op->attrs.Set(attr_name, value);
+ op->operation.MutableAttrs()->Set(attr_name, value);
}
void TFE_OpSetAttrBool(TFE_Op* op, const char* attr_name, unsigned char value) {
- op->attrs.Set(attr_name, (value == 0) ? false : true);
+ op->operation.MutableAttrs()->Set(attr_name, (value == 0) ? false : true);
}
void TFE_OpSetAttrType(TFE_Op* op, const char* attr_name, TF_DataType value) {
- op->attrs.Set(attr_name, static_cast(value));
+ op->operation.MutableAttrs()->Set(attr_name,
+ static_cast(value));
}
void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, const int64_t* dims,
@@ -336,23 +326,24 @@ void TFE_OpSetAttrShape(TFE_Op* op, const char* attr_name, const int64_t* dims,
proto.add_dim()->set_size(dims[d]);
}
}
- op->attrs.Set(attr_name, proto);
+ op->operation.MutableAttrs()->Set(attr_name, proto);
}
void TFE_OpSetAttrFunction(TFE_Op* op, const char* attr_name,
const TFE_Op* value) {
tensorflow::AttrValue attr_value;
tensorflow::NameAttrList* func = attr_value.mutable_func();
- func->set_name(value->name);
- value->attrs.FillAttrValueMap(func->mutable_attr());
- op->attrs.Set(attr_name, attr_value);
+ func->set_name(value->operation.Name());
+ value->operation.Attrs().FillAttrValueMap(func->mutable_attr());
+ op->operation.MutableAttrs()->Set(attr_name, attr_value);
}
#define TFE_OP_SET_ATTR_LIST(fn, type) \
void fn(TFE_Op* op, const char* attr_name, const type* values, \
int num_values) { \
- op->attrs.Set(attr_name, tensorflow::gtl::ArraySlice( \
- values, num_values)); \
+ op->operation.MutableAttrs()->Set( \
+ attr_name, \
+ tensorflow::gtl::ArraySlice(values, num_values)); \
}
TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrStringList, char*)
TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrFloatList, float)
@@ -360,14 +351,14 @@ TFE_OP_SET_ATTR_LIST(TFE_OpSetAttrFloatList, float)
void TFE_OpSetAttrIntList(TFE_Op* op, const char* attr_name,
const int64_t* values, int num_values) {
- op->attrs.Set(attr_name,
- tensorflow::gtl::ArraySlice(
- reinterpret_cast(values), num_values));
+ op->operation.MutableAttrs()->Set(
+ attr_name, tensorflow::gtl::ArraySlice(
+ reinterpret_cast(values), num_values));
}
void TFE_OpSetAttrTypeList(TFE_Op* op, const char* attr_name,
const TF_DataType* values, int num_values) {
- op->attrs.Set(
+ op->operation.MutableAttrs()->Set(
attr_name,
tensorflow::gtl::ArraySlice(
reinterpret_cast(values), num_values));
@@ -379,8 +370,8 @@ void TFE_OpSetAttrBoolList(TFE_Op* op, const char* attr_name,
for (int i = 0; i < num_values; ++i) {
b[i] = values[i];
}
- op->attrs.Set(attr_name,
- tensorflow::gtl::ArraySlice(b.get(), num_values));
+ op->operation.MutableAttrs()->Set(
+ attr_name, tensorflow::gtl::ArraySlice(b.get(), num_values));
}
void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name,
@@ -410,9 +401,9 @@ void TFE_OpSetAttrShapeList(TFE_Op* op, const char* attr_name,
}
}
}
- op->attrs.Set(attr_name,
- tensorflow::gtl::ArraySlice(
- proto.get(), num_values));
+ op->operation.MutableAttrs()->Set(
+ attr_name, tensorflow::gtl::ArraySlice(
+ proto.get(), num_values));
}
void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name,
@@ -420,532 +411,25 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name,
std::unique_ptr funcs(
new tensorflow::NameAttrList[num_values]);
for (int i = 0; i < num_values; i++) {
- funcs[i].set_name(value[i]->name);
- value[i]->attrs.FillAttrValueMap(funcs[i].mutable_attr());
- }
- op->attrs.Set(attr_name,
- tensorflow::gtl::ArraySlice(
- funcs.get(), num_values));
-}
-} // extern "C"
-
-namespace {
-
-// Initializes the step stats if needed.
-void MaybeInitializeStepStats(tensorflow::StepStats* step_stats,
- tensorflow::EagerContext* ctx) {
- // Lazily initialize the RunMetadata with information about all devices if
- // this is the first call.
- while (step_stats->dev_stats_size() < ctx->devices()->size()) {
- int device_idx = step_stats->dev_stats_size();
- auto* dev_stats = step_stats->add_dev_stats();
- dev_stats->set_device(ctx->devices()->at(device_idx)->name());
- }
-}
-
-int StepStatsDeviceIndex(tensorflow::StepStats* step_stats,
- tensorflow::EagerContext* ctx,
- tensorflow::Device* device) {
- // Find the current device's index.
- if (device == nullptr) {
- device = ctx->HostCPU();
- }
- for (int i = 0; i < ctx->devices()->size(); ++i) {
- if (ctx->devices()->at(i) == device ||
- ctx->devices()->at(i)->name() == device->name()) {
- return i;
- }
- }
- // TODO(apassos) do not fall back to host CPU if device is unknown.
- return 0;
-}
-
-tensorflow::Status ValidateInputTypeAndPlacement(
- tensorflow::EagerContext* ctx, tensorflow::Device* op_device, TFE_Op* op,
- const tensorflow::OpKernel* kernel, tensorflow::RunMetadata* run_metadata) {
- tensorflow::Device* host_device = ctx->HostCPU();
- const tensorflow::MemoryTypeVector& memtypes = kernel->input_memory_types();
- if (memtypes.size() != op->inputs.size()) {
- return tensorflow::errors::InvalidArgument(
- "expected ", memtypes.size(), " inputs, got ", op->inputs.size());
- }
- for (int i = 0; i < op->inputs.size(); ++i) {
- const tensorflow::Device* expected_device =
- memtypes[i] == tensorflow::HOST_MEMORY ? host_device : op_device;
- tensorflow::TensorHandle* handle = op->inputs[i];
- tensorflow::Device* handle_device = nullptr;
- TF_RETURN_IF_ERROR(handle->Device(&handle_device));
- const tensorflow::Device* actual_device =
- handle_device == nullptr ? host_device : handle_device;
- if (expected_device != actual_device) {
- switch (ctx->GetDevicePlacementPolicy()) {
- case tensorflow::DEVICE_PLACEMENT_SILENT_FOR_INT32:
- // TODO(xpan): See if we could bubble python related error up
- // to python level.
- if (handle->dtype == tensorflow::DT_INT32) {
- // Note: enabling silent copies of int32 tensors to match behavior
- // of graph mode.
- break;
- }
- TF_FALLTHROUGH_INTENDED;
- case tensorflow::DEVICE_PLACEMENT_EXPLICIT:
- return tensorflow::errors::InvalidArgument(
- "Tensors on conflicting devices:"
- " cannot compute ",
- op->name, " as input #", i, " was expected to be on ",
- expected_device->name(), " but is actually on ",
- actual_device->name(), " (operation running on ",
- op_device->name(), ")",
- " Tensors can be copied explicitly using .gpu() or .cpu(),"
- " or transparently copied by using tfe.enable_eager_execution("
- "tfe.DEVICE_PLACEMENT_SILENT). Copying tensors between devices"
- " may slow down your model");
- case tensorflow::DEVICE_PLACEMENT_WARN:
- LOG(WARNING) << "before computing " << op->name << " input #" << i
- << " was expected to be on " << expected_device->name()
- << " but is actually on " << actual_device->name()
- << " (operation running on " << op_device->name()
- << "). This triggers a copy which can be a performance "
- "bottleneck.";
- break;
- case tensorflow::DEVICE_PLACEMENT_SILENT: // Do nothing.
- break;
- }
- // We are only here if the policy is warn or silent copies, so we should
- // trigger a copy.
- auto pre_time = tensorflow::Env::Default()->NowMicros();
- tensorflow::TensorHandle* copied_tensor = nullptr;
- tensorflow::Status status = tensorflow::EagerCopyToDevice(
- handle, ctx, expected_device->name().c_str(), &copied_tensor);
- if (run_metadata != nullptr) {
- auto* step_stats = run_metadata->mutable_step_stats();
- MaybeInitializeStepStats(step_stats, ctx);
- // Record the sending on the source device for now.
- int device_idx = StepStatsDeviceIndex(step_stats, ctx, handle_device);
- auto* dev_stats = step_stats->mutable_dev_stats(device_idx);
- auto* node_stats = dev_stats->add_node_stats();
- node_stats->set_node_name("_Send");
- node_stats->set_all_start_micros(pre_time);
- node_stats->set_op_end_rel_micros(
- tensorflow::Env::Default()->NowMicros() - pre_time);
- }
- if (!status.ok()) {
- if (copied_tensor != nullptr) copied_tensor->Unref();
- return tensorflow::errors::Internal(
- "Failed copying input tensor from ", actual_device->name(), " to ",
- expected_device->name(), " in order to run ", op->name, ": ",
- status.error_message());
- }
- handle->Unref();
- handle = copied_tensor;
- op->inputs[i] = copied_tensor;
- }
- if (handle->dtype != kernel->input_type(i)) {
- return tensorflow::errors::InvalidArgument(
- "cannot compute ", op->name, " as input #", i,
- " was expected to be a ",
- tensorflow::DataTypeString(kernel->input_type(i)),
- " tensor but is a ", tensorflow::DataTypeString(handle->dtype),
- " tensor");
- }
- }
- return tensorflow::Status::OK();
-}
-
-tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef,
- TFE_Context* ctx, TF_Status* status) {
- tensorflow::DeviceSet ds;
- for (tensorflow::Device* d : *ctx->context.devices()) {
- ds.AddDevice(d);
- }
- tensorflow::DeviceTypeVector final_devices;
- status->status = tensorflow::SupportedDeviceTypesForNode(
- ds.PrioritizedDeviceTypeList(), ndef, &final_devices);
- if (!status->status.ok()) {
- return nullptr;
- }
- if (final_devices.empty()) {
- status->status = tensorflow::errors::Internal(
- "Could not find valid device for node ", ndef.DebugString());
- return nullptr;
- }
- for (tensorflow::Device* d : *ctx->context.devices()) {
- if (d->device_type() == final_devices[0].type_string()) {
- return d;
- }
- }
- status->status = tensorflow::errors::Unknown(
- "Could not find a device for node ", ndef.DebugString());
- return nullptr;
-}
-
-
-#ifdef TENSORFLOW_EAGER_USE_XLA
-// Synthesizes and returns a wrapper function over `op`, which must be a
-// primitive op (e.g. matmul).
-//
-// The wrapper function conforms to the function signature expected by
-// _XlaLaunchOp, with input params ordered by . For example, if the op has input params , they will be reordered to as the input params to the synthesized function.
-//
-// It populates `const_input_types`, `arg_input_types` and
-// `op_input_to_func_input` based on the reordering results, that the caller can
-// use them to build an _XlaLaunchOp. On error, it returns NULL, and sets
-// `status` accordingly.
-const tensorflow::FunctionDef* OpToFunction(
- TFE_Op* op, std::vector* const_input_types,
- std::vector* arg_input_types,
- tensorflow::gtl::FlatMap* op_input_to_func_input,
- TF_Status* status) {
- DCHECK(!op->is_function());
-
- tensorflow::FunctionDef fdef;
-
- // Get the OpDef of the op we are trying to encapsulate.
- TFE_Context* ctx = op->ctx;
- const tensorflow::OpRegistrationData* op_data;
- {
- status->status = ctx->context.FindFunctionOpData(op->name, &op_data);
- if (!status->status.ok()) {
- return nullptr;
- }
- }
- const tensorflow::OpDef& op_def = op_data->op_def;
-
- tensorflow::OpDef* signature = fdef.mutable_signature();
-
- // Handle constant inputs.
- const std::unordered_set const_inputs(
- *tensorflow::XlaOpRegistry::CompileTimeConstantInputs(op->name));
-
- // First add place holders for the input args, so that we can refer to them by
- // position in the next loop. Also tally up the resource inputs.
- int num_resource_inputs = 0;
- for (int i = 0; i < op_def.input_arg_size(); ++i) {
- if (op_def.input_arg(i).type() == tensorflow::DT_RESOURCE) {
- ++num_resource_inputs;
- }
- signature->add_input_arg();
- }
-
- // Now we map the input params from `op_def` to `signature`, where the param
- // ordering for `signature` is: .
- int const_index = 0;
- int arg_index = const_inputs.size();
- int resource_index = op_def.input_arg_size() - num_resource_inputs;
- for (int i = 0; i < op_def.input_arg_size(); ++i) {
- const tensorflow::OpDef::ArgDef& op_input_arg = op_def.input_arg(i);
- tensorflow::OpDef::ArgDef* func_input_arg = nullptr;
- if (const_inputs.find(op_input_arg.name()) != const_inputs.end()) {
- VLOG(1) << "For const input, mapping op input " << i << " to func input "
- << const_index;
- (*op_input_to_func_input)[i] = const_index;
- func_input_arg = signature->mutable_input_arg(const_index++);
- const_input_types->push_back(
- static_cast(op->inputs[i]->dtype));
- } else if (op_input_arg.type() == tensorflow::DT_RESOURCE) {
- VLOG(1) << "For resource input, mapping op input " << i
- << " to func input " << resource_index;
- (*op_input_to_func_input)[i] = resource_index;
- func_input_arg = signature->mutable_input_arg(resource_index++);
- } else {
- VLOG(1) << "For arg input, mapping op input " << i << " to func input "
- << arg_index;
- (*op_input_to_func_input)[i] = arg_index;
- func_input_arg = signature->mutable_input_arg(arg_index++);
- arg_input_types->push_back(
- static_cast(op->inputs[i]->dtype));
- }
-
- func_input_arg->set_name(op_input_arg.name());
- func_input_arg->set_type(op->inputs[i]->dtype);
- }
- VLOG(1) << "Added OpDef Inputs: " << fdef.DebugString();
-
- // Resources args are at the end of the function input params, and we should
- // have iterated over all of them.
- DCHECK_EQ(signature->input_arg_size(), resource_index);
-
- // Make the synthesized function's name unique.
- signature->set_name(tensorflow::strings::StrCat(
- op_def.name(), func_id_generator.fetch_add(1)));
-
- // Add the node def and set its input names to match op_def's names.
- const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef();
- DCHECK_EQ(signature->input_arg_size(), ndef.input_size());
- *fdef.add_node_def() = ndef;
- for (int i = 0; i < op_def.input_arg_size(); ++i) {
- fdef.mutable_node_def(0)->set_input(i, op_def.input_arg(i).name());
- }
- VLOG(1) << "Added NodeDef: " << fdef.DebugString();
-
- // Fix the output names and set output types.
- for (int i = 0; i < op_def.output_arg_size(); ++i) {
- tensorflow::OpDef::ArgDef* arg = signature->add_output_arg();
- const tensorflow::OpDef::ArgDef& op_def_arg = op_def.output_arg(i);
- const string& out_tensor_name = tensorflow::strings::StrCat(
- ndef.name(), ":", op_def_arg.name(), ":", 0);
- arg->set_name(op_def_arg.name());
- (*fdef.mutable_ret())[op_def_arg.name()] = out_tensor_name;
- const string& type_attr = op_def_arg.type_attr();
- if (!type_attr.empty()) {
- auto i = ndef.attr().find(type_attr);
- if (i == ndef.attr().end()) {
- status->status = tensorflow::errors::InvalidArgument(
- tensorflow::strings::StrCat("Could not find attr ", type_attr,
- " in NodeDef ", ndef.DebugString()));
- return nullptr;
- }
- arg->set_type(i->second.type());
- }
- }
- VLOG(1) << "Fixed Output names and all types: " << fdef.DebugString();
-
- status->status = ctx->context.AddFunctionDef(fdef);
- if (!status->status.ok()) return nullptr;
- const auto ret = ctx->context.FindFunctionDef(signature->name());
- DCHECK(ret != nullptr);
- return ret;
-}
-
-// Builds an _XLALaunchOp as a wrapper over 'op', so that 'op' can be executed
-// via XLA.
-std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) {
- VLOG(1) << "Creating _XlaLaunchOp for TFE_Op " << op->name;
- auto launch_op =
- std::unique_ptr(TFE_NewOp(op->ctx, "_XlaLaunch", status));
- if (TF_GetCode(status) != TF_OK) return nullptr;
- if (op->device) {
- TFE_OpSetDevice(launch_op.get(), op->device->name().c_str(), status);
- if (TF_GetCode(status) != TF_OK) return nullptr;
- }
-
- const tensorflow::FunctionDef* fdef;
- {
- fdef = op->ctx->context.FindFunctionDef(op->name);
- }
- std::vector const_input_types;
- std::vector arg_input_types;
- tensorflow::gtl::FlatMap op_input_to_func_input;
- if (fdef == nullptr) {
- // See if this is a primitive op, and if so create a function for it, so
- // that _XlaLaunchOp can access it.
- fdef = OpToFunction(op, &const_input_types, &arg_input_types,
- &op_input_to_func_input, status);
- if (!status->status.ok()) return nullptr;
- } else {
- // TODO(hongm): XlaOpRegistry::CompileTimeConstantInputs() does not work for
- // functions, so we need to find another way to handle constant inputs.
- for (int i = const_input_types.size();
- i < fdef->signature().input_arg_size(); ++i) {
- VLOG(1) << "Adding Targs from input arg " << i;
- const tensorflow::OpDef::ArgDef& arg = fdef->signature().input_arg(i);
- arg_input_types.push_back(static_cast(arg.type()));
- }
- }
- DCHECK(fdef != nullptr);
-
- // Copy inputs and their devices.
- // Since input param reordering may have occurred between `op` and `launch_op`
- // via `op_input_to_func_input`, adjust the actual inputs accordingly.
- launch_op->inputs = op->inputs;
- for (tensorflow::TensorHandle* h : launch_op->inputs) {
- h->Ref();
- }
- if (!op_input_to_func_input.empty()) {
- DCHECK_EQ(op->inputs.size(), op_input_to_func_input.size());
- for (int i = 0; i < op_input_to_func_input.size(); ++i) {
- VLOG(1) << "mapping op input " << i << " to func input "
- << op_input_to_func_input[i];
-
- launch_op->inputs[op_input_to_func_input[i]] = op->inputs[i];
- }
- }
- launch_op->attrs.NumInputs(op->inputs.size());
-
- TFE_OpSetAttrTypeList(launch_op.get(), "Tconstants", const_input_types.data(),
- const_input_types.size());
-
- // Set Targs and Nresources attrs.
- TFE_OpSetAttrTypeList(launch_op.get(), "Targs", arg_input_types.data(),
- arg_input_types.size());
- const int num_resource_inputs = fdef->signature().input_arg_size() -
- const_input_types.size() -
- arg_input_types.size();
- TFE_OpSetAttrInt(launch_op.get(), "Nresources", num_resource_inputs);
-
- // Set Tresults attr.
- std::vector tresults;
- for (const tensorflow::OpDef::ArgDef& arg : fdef->signature().output_arg()) {
- tresults.push_back(static_cast(arg.type()));
+ funcs[i].set_name(value[i]->operation.Name());
+ value[i]->operation.Attrs().FillAttrValueMap(funcs[i].mutable_attr());
}
- TFE_OpSetAttrTypeList(launch_op.get(), "Tresults", tresults.data(),
- tresults.size());
-
- // Set function attr.
- tensorflow::AttrValue attr_value;
- tensorflow::NameAttrList* func = attr_value.mutable_func();
- func->set_name(fdef->signature().name());
- launch_op->attrs.Set("function", attr_value);
-
- return launch_op;
+ op->operation.MutableAttrs()->Set(
+ attr_name, tensorflow::gtl::ArraySlice(
+ funcs.get(), num_values));
}
-#endif // TENSORFLOW_EAGER_USE_XLA
-
-} // namespace
-
-extern "C" {
void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals,
TF_Status* status) {
- TFE_Context* ctx = op->ctx;
- status->status = ctx->context.GetStatus();
+ tensorflow::gtl::InlinedVector handle_retvals(
+ *num_retvals);
+ status->status =
+ tensorflow::EagerExecute(&op->operation, &handle_retvals, num_retvals);
if (!status->status.ok()) {
return;
}
-#ifdef TENSORFLOW_EAGER_USE_XLA
- std::unique_ptr xla_launch_op;
- if (op->use_xla && op->name != "_XlaLaunch") {
- xla_launch_op = BuildXlaLaunch(op, status);
- if (!status->status.ok()) {
- return;
- }
- op = xla_launch_op.get();
- }
-#endif // TENSORFLOW_EAGER_USE_XLA
- // Ensure all resource-touching ops run in the device the resource is,
- // regardless of anything else that has been specified. This is identical to
- // the graph mode behavior.
- for (int i = 0; i < op->inputs.size(); ++i) {
- tensorflow::Device* input_op_device = nullptr;
- status->status = op->inputs[i]->OpDevice(&input_op_device);
- if (!status->status.ok()) return;
- VLOG(2) << "for op " << op->name << " input " << i << " "
- << tensorflow::DataTypeString(op->inputs[i]->dtype) << " "
- << (input_op_device == nullptr ? "cpu" : input_op_device->name())
- << " " << (op->device == nullptr ? "cpu" : op->device->name());
- if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE &&
- (input_op_device != op->device || input_op_device == nullptr)) {
- tensorflow::Device* d =
- input_op_device == nullptr ? ctx->context.HostCPU() : input_op_device;
- VLOG(1) << "Changing device of operation " << op->name << " to "
- << d->name() << " because input #" << i
- << " is a resource in this device.";
- op->device = d;
- }
- }
- tensorflow::Device* device = op->device;
-
- tensorflow::Fprint128 cache_key =
- op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name());
- tensorflow::KernelAndDevice* kernel = ctx->context.GetCachedKernel(cache_key);
- if (kernel == nullptr) {
- const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef();
- if (device == nullptr) {
- device = SelectDevice(ndef, ctx, status);
- if (!status->status.ok()) {
- return;
- }
- }
- CHECK(device != nullptr);
- if (ctx->context.LogDevicePlacement()) {
- LOG(INFO) << "Executing op " << ndef.op() << " in device "
- << device->name();
- }
- kernel = new tensorflow::KernelAndDevice(ctx->context.GetRendezvous());
- // Knowledge of the implementation of Init (and in-turn
- // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def
- // will be accessed, so grab on to the lock.
- // See WARNING comment in Execute (before kernel->Run) - would be nice to
- // rework to avoid this subtlety.
- tensorflow::tf_shared_lock l(*ctx->context.FunctionsMu());
- status->status = tensorflow::KernelAndDevice::Init(
- ndef, ctx->context.func_lib(device), kernel);
- if (!status->status.ok()) {
- delete kernel;
- return;
- }
- // Update output_dtypes inside `kernel`.
- const tensorflow::OpDef* op_def = nullptr;
- const tensorflow::FunctionDef* function_def =
- ctx->context.FuncLibDef()->Find(ndef.op());
- if (function_def != nullptr) {
- op_def = &(function_def->signature());
- }
- if (op_def == nullptr) {
- status->status = OpDefForOp(ndef.op().c_str(), &op_def);
- if (!status->status.ok()) {
- return;
- }
- }
- tensorflow::DataTypeVector input_dtypes;
- status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes,
- kernel->mutable_output_dtypes());
- if (!status->status.ok()) {
- return;
- }
- ctx->context.AddKernelToCache(cache_key, kernel);
- }
- const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes();
- const int output_dtypes_size = output_dtypes.size();
- if (output_dtypes_size > *num_retvals) {
- TF_SetStatus(status, TF_INVALID_ARGUMENT,
- tensorflow::strings::StrCat("Expecting ", output_dtypes.size(),
- " outputs, but *num_retvals is ",
- *num_retvals)
- .c_str());
- return;
- }
- *num_retvals = output_dtypes_size;
- if (device == nullptr) {
- // TODO(apassos) debug how the assignment below might return a different
- // device from the one requested above.
- device = kernel->device();
- }
- status->status = ValidateInputTypeAndPlacement(
- &ctx->context, device, op, kernel->kernel(),
- ctx->context.ShouldStoreMetadata() ? ctx->context.RunMetadataProto()
- : nullptr);
- if (!status->status.ok()) return;
- std::unique_ptr maybe_stats;
- if (ctx->context.ShouldStoreMetadata()) {
- maybe_stats.reset(new tensorflow::NodeExecStats);
- maybe_stats->set_node_name(op->name);
- maybe_stats->set_all_start_micros(tensorflow::Env::Default()->NowMicros());
- maybe_stats->set_op_start_rel_micros(0);
- maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros());
- // TODO(apassos) track referenced tensors
- }
- if (ctx->context.Async()) {
- // Note that for async mode, execution order will make sure that all
- // input handles are ready before executing them.
- // TODO(agarwal): Consider executing "cheap" kernels inline for performance.
- tensorflow::gtl::InlinedVector handle_retvals(
- *num_retvals);
- tensorflow::uint64 id = op->ctx->context.NextId();
- for (int i = 0; i < *num_retvals; ++i) {
- tensorflow::TensorHandle* h =
- new tensorflow::TensorHandle(id, output_dtypes[i], &op->ctx->context);
- retvals[i] = new TFE_TensorHandle(h);
- handle_retvals[i] = h;
- }
- tensorflow::EagerNode* node = new tensorflow::ExecuteNode(
- id, &op->ctx->context, op->device, op->inputs, kernel,
- maybe_stats.release(), output_dtypes, handle_retvals);
- ctx->context.ExecutorAdd(node);
- } else {
- // Execute checks if retvals[i] is nullptr or not to figure if it needs to
- // allocate it.
- std::vector handle_retvals(*num_retvals,
- nullptr);
- status->status = tensorflow::EagerExecute(
- &op->ctx->context, op->device, op->inputs, kernel, maybe_stats.get(),
- handle_retvals.data(), *num_retvals);
- for (int i = 0; i < *num_retvals; ++i) {
- retvals[i] = new TFE_TensorHandle(handle_retvals[i]);
- }
+ for (int i = 0; i < *num_retvals; ++i) {
+ retvals[i] = new TFE_TensorHandle(handle_retvals[i]);
}
}
@@ -1088,10 +572,3 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op,
}
}
} // namespace tensorflow
-
-
-TFE_Op::~TFE_Op() {
- for (tensorflow::TensorHandle* h : inputs) {
- h->Unref();
- }
-}
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
index 3926c22ce1f9e194b1452c796c83944d10cfdc64..c06ce84a8c578aa60dd626c24bd58098b78ae750 100644
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -30,7 +30,7 @@ limitations under the License.
#ifdef SWIG
#define TF_CAPI_EXPORT
#else
-#if defined(COMPILER_MSVC)
+#if defined(_WIN32)
#ifdef TF_COMPILE_LIBRARY
#define TF_CAPI_EXPORT __declspec(dllexport)
#else
@@ -38,7 +38,7 @@ limitations under the License.
#endif // TF_COMPILE_LIBRARY
#else
#define TF_CAPI_EXPORT __attribute__((visibility("default")))
-#endif // COMPILER_MSVC
+#endif // _WIN32
#endif // SWIG
#ifdef __cplusplus
diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h
index 05dc64f521735f944559392f470a37590e93f17c..49e1aab1cef9577256d9b081858cf094c788caf8 100644
--- a/tensorflow/c/eager/c_api_internal.h
+++ b/tensorflow/c/eager/c_api_internal.h
@@ -32,6 +32,7 @@ limitations under the License.
#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/common_runtime/eager/context.h"
#include "tensorflow/core/common_runtime/eager/eager_executor.h"
+#include "tensorflow/core/common_runtime/eager/eager_operation.h"
#include "tensorflow/core/common_runtime/eager/kernel_and_device.h"
#include "tensorflow/core/common_runtime/eager/tensor_handle.h"
#include "tensorflow/core/common_runtime/function.h"
@@ -45,7 +46,6 @@ limitations under the License.
#include "tensorflow/core/platform/thread_annotations.h"
#include "tensorflow/core/public/version.h"
-
struct TFE_ContextOptions {
TF_SessionOptions session_options;
// true if async execution is enabled.
@@ -85,19 +85,9 @@ struct TFE_Op {
// t is NULL iff the TFE_Op corresponds to a TensorFlow function instead of a
// primitive operation.
TFE_Op(TFE_Context* ctx, const char* op, const tensorflow::AttrTypeMap* t)
- : ctx(ctx), name(op), attrs(op), attr_types(t), device(nullptr) {}
-
- ~TFE_Op();
-
- bool const is_function() const { return attr_types == nullptr; }
+ : operation(&ctx->context, op, t) {}
- TFE_Context* ctx; // Must outlive the TFE_Op.
- const tensorflow::string name;
- tensorflow::AttrBuilder attrs;
- const tensorflow::AttrTypeMap* attr_types;
- tensorflow::gtl::InlinedVector inputs;
- tensorflow::Device* device;
- bool use_xla = false;
+ tensorflow::EagerOperation operation;
};
namespace tensorflow {
diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc
index abe2793ce894ad07c252575c5d55d98342916eac..e6c51ab17a867a0697f15d7683d8ca52c062035d 100644
--- a/tensorflow/c/eager/runtime.cc
+++ b/tensorflow/c/eager/runtime.cc
@@ -184,8 +184,7 @@ void CombineUnordered(const tensorflow::Fprint128& a,
inline tensorflow::Fprint128 CacheKeyHelper(StringPiece s,
const tensorflow::Fprint128& b) {
- // TODO(agarwal): avoid ToString().
- tensorflow::Fprint128 a = tensorflow::Fingerprint128(s.ToString());
+ tensorflow::Fprint128 a = tensorflow::Fingerprint128(s);
return FingerprintCat128(a, b);
}
@@ -213,10 +212,8 @@ tensorflow::Fprint128 AttrBuilder::CacheKey(const string& device) const {
if (node_def_finalized_) return f;
}
for (const auto& p : string_attrs_) {
- // TODO(agarwal): avoid ToString().
- CombineUnordered(CacheKeyHelper(p.first, tensorflow::Fingerprint128(
- p.second.ToString())),
- &f);
+ CombineUnordered(
+ CacheKeyHelper(p.first, tensorflow::Fingerprint128(p.second)), &f);
}
for (const auto& p : int_attrs_) {
CombineUnordered(CacheKeyHelper(p.first, static_cast(p.second)),
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 97c323b87228039ba10f4ed5e434aa83621b1220..e9ed3395c448305bcd6317b0b292b4e4e0b659b1 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -130,13 +130,15 @@ class GradientTape {
}
}
- bool ShouldRecord(gtl::ArraySlice tensor_ids);
+ bool ShouldRecord(gtl::ArraySlice tensor_ids,
+ gtl::ArraySlice dtypes);
void Watch(int64 tensor_id);
void RecordOperation(const string& op_type,
gtl::ArraySlice output_tensors,
gtl::ArraySlice input_tensor_id,
+ gtl::ArraySlice input_dtypes,
BackwardFunction* backward_function,
const std::function& backward_function_deleter);
@@ -170,12 +172,30 @@ class GradientTape {
// Template instantiations here
+inline bool IsDtypeTrainable(DataType dtype) {
+ switch (dtype) {
+ case DT_HALF:
+ case DT_BFLOAT16:
+ case DT_FLOAT:
+ case DT_DOUBLE:
+ case DT_COMPLEX64:
+ case DT_COMPLEX128:
+ case DT_RESOURCE:
+ case DT_VARIANT:
+ return true;
+ default:
+ return false;
+ }
+}
+
template
bool GradientTape::ShouldRecord(
- gtl::ArraySlice tensor_ids) {
- for (int64 i : tensor_ids) {
- if (tensor_tape_.find(i) != tensor_tape_.end()) {
- return true;
+ gtl::ArraySlice tensor_ids,
+ gtl::ArraySlice dtypes) {
+ CHECK_EQ(tensor_ids.size(), dtypes.size());
+ for (int i = 0; i < tensor_ids.size(); ++i) {
+ if (tensor_tape_.find(tensor_ids[i]) != tensor_tape_.end()) {
+ return IsDtypeTrainable(dtypes[i]);
}
}
return false;
@@ -189,9 +209,11 @@ void GradientTape::Watch(int64 tensor_id) {
template
void GradientTape::RecordOperation(
const string& op_type, gtl::ArraySlice output_tensors,
- gtl::ArraySlice input_tensor_id, BackwardFunction* backward_function,
+ gtl::ArraySlice input_tensor_id,
+ gtl::ArraySlice input_dtypes,
+ BackwardFunction* backward_function,
const std::function& backward_function_deleter) {
- if (!ShouldRecord(input_tensor_id)) {
+ if (!ShouldRecord(input_tensor_id, input_dtypes)) {
backward_function_deleter();
return;
}
@@ -380,49 +402,39 @@ Status InitialGradients(const VSpace& vspace,
gtl::ArraySlice output_gradients,
const TensorTape& tensor_tape,
const OpTape& op_tape,
- const gtl::FlatMap& tensor_usage_counts,
gtl::FlatMap>* result) {
for (int i = 0; i < target_tensor_ids.size(); ++i) {
const int64 id = target_tensor_ids[i];
- if (tensor_usage_counts.find(id) != tensor_usage_counts.end()) {
- if (!output_gradients.empty() && output_gradients[i] != nullptr) {
- // TODO(apassos) figure out how to print debugging information here.
- return errors::InvalidArgument(
- "A gradient was provided for a tensor which is used as part of the "
- "computation.");
- }
- } else {
- if (output_gradients.empty() || output_gradients[i] == nullptr) {
- auto tensor_it = tensor_tape.find(id);
- if (tensor_it != tensor_tape.end() && tensor_it->second != -1) {
- auto op_it = op_tape.find(tensor_it->second);
- if (op_it == op_tape.end()) {
- return errors::Internal(
- "Internal state of the gradient tape is invalid: "
- "failed to find operation producing a tensor");
- }
- bool found = false;
- for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) {
- if (op_it->second.output_tensor_info[j].id == id) {
- found = true;
- (*result)[id].push_back(
- vspace.Ones(op_it->second.output_tensor_info[j].shape,
- op_it->second.output_tensor_info[j].dtype));
- break;
- }
- }
- if (!found) {
- return errors::Internal(
- "Internal state of the gradient tape is invalid: "
- "none of operations outputs match expected tensor");
+ if (output_gradients.empty() || output_gradients[i] == nullptr) {
+ auto tensor_it = tensor_tape.find(id);
+ if (tensor_it != tensor_tape.end() && tensor_it->second != -1) {
+ auto op_it = op_tape.find(tensor_it->second);
+ if (op_it == op_tape.end()) {
+ return errors::Internal(
+ "Internal state of the gradient tape is invalid: "
+ "failed to find operation producing a tensor");
+ }
+ bool found = false;
+ for (int j = 0; j < op_it->second.output_tensor_info.size(); ++j) {
+ if (op_it->second.output_tensor_info[j].id == id) {
+ found = true;
+ (*result)[id].push_back(
+ vspace.Ones(op_it->second.output_tensor_info[j].shape,
+ op_it->second.output_tensor_info[j].dtype));
+ break;
}
- } else {
- // No record of the target tensor found on the tape, so no gradient
- // needs to be computed from it. Do nothing.
+ }
+ if (!found) {
+ return errors::Internal(
+ "Internal state of the gradient tape is invalid: "
+ "none of operations outputs match expected tensor");
}
} else {
- (*result)[id].push_back(output_gradients[i]);
+ // No record of the target tensor found on the tape, so no gradient
+ // needs to be computed from it. Do nothing.
}
+ } else {
+ (*result)[id].push_back(output_gradients[i]);
}
}
return Status::OK();
@@ -451,8 +463,7 @@ Status GradientTape::ComputeGradient(
InitialStack(state.op_tape, state.op_missing_tensor);
gtl::FlatMap> gradients;
Status s = InitialGradients(vspace, target_tensor_ids, output_gradients,
- tensor_tape_, state.op_tape,
- state.tensor_usage_counts, &gradients);
+ tensor_tape_, state.op_tape, &gradients);
auto cleanup = [this, &state]() {
if (!persistent_) {
// Release all backprop functions
diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc
index 93155998b86d59ec78c7ff25f146b8e3c8eac380..e18fdf6c57bd3f432d8cb73536fb816df90b3963 100644
--- a/tensorflow/c/python_api.cc
+++ b/tensorflow/c/python_api.cc
@@ -110,7 +110,7 @@ void ExtendSession(TF_Session* session, TF_Status* status) {
session->extend_before_run = false;
}
-std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
+std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
Node* node = &output.oper->node;
CppShapeInferenceResult::HandleData handle_data;
handle_data.set_is_set(true);
@@ -135,4 +135,30 @@ std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output) {
return result;
}
+void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
+ const void* proto, size_t proto_len,
+ TF_Status* status) {
+ tensorflow::CppShapeInferenceResult::HandleData handle_data;
+ if (!handle_data.ParseFromArray(proto, proto_len)) {
+ status->status = tensorflow::errors::InvalidArgument(
+ "Couldn't deserialize HandleData proto");
+ return;
+ }
+ DCHECK(handle_data.is_set());
+
+ tensorflow::mutex_lock l(graph->mu);
+ tensorflow::shape_inference::InferenceContext* ic =
+ graph->refiner.GetContext(&output.oper->node);
+
+ std::vector shapes_and_types;
+ for (const auto& shape_and_type_proto : handle_data.shape_and_type()) {
+ tensorflow::shape_inference::ShapeHandle shape;
+ status->status =
+ ic->MakeShapeFromShapeProto(shape_and_type_proto.shape(), &shape);
+ if (status->status.ok()) return;
+ shapes_and_types.emplace_back(shape, shape_and_type_proto.dtype());
+ }
+ ic->set_output_handle_shapes_and_types(output.index, shapes_and_types);
+}
+
} // namespace tensorflow
diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h
index 2d4c8cd9ed7bc926f448dab1f6b50ed74179ea14..4bcb5bde62c8a4df4e68c1ce0daaf459434ceb5d 100644
--- a/tensorflow/c/python_api.h
+++ b/tensorflow/c/python_api.h
@@ -55,9 +55,15 @@ void ExtendSession(TF_Session* session, TF_Status* status);
// Returns the serialized CppShapeInferenceResult::HandleData proto for
// `output` if its a resource tensor, or otherwise returns the empty string.
-// TODO(b/74620627): remove when _USE_C_SHAPES is removed
-std::string ResourceHandleShapeAndType(TF_Graph* graph, TF_Output output);
-
+std::string GetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output);
+
+// Sets `output` based on `proto`, which should be a serialized
+// CppShapeInferenceResult::HandleData proto.
+// NOTE(skyewm): `proto` is passed a void*/size_t pair instead of a std::string
+// because I couldn't get SWIG to work otherwise.
+void SetResourceHandleShapeAndType(TF_Graph* graph, TF_Output output,
+ const void* proto, size_t proto_len,
+ TF_Status* status);
} // namespace tensorflow
#endif // TENSORFLOW_C_PYTHON_API_H_
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index d73121c7b701ec06c03836d1a765f4b35d88fe92..d6a4f141b6bb8ccadb77f1fa83b5fb742d78f70f 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -440,7 +440,7 @@ string AvoidCPPKeywords(StringPiece name) {
if (IsCPPKeyword(name)) {
return strings::StrCat(name, "_");
}
- return name.ToString();
+ return std::string(name);
}
void InferArgAttributes(const OpDef::ArgDef& arg,
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index c143b978338815ebc7134eb0a07867c5d8b13dca..62a889181e787f2e181135ab0563c45e1bab8812 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -220,7 +220,7 @@ std::unordered_set Scope::Impl::GetColocationConstraints(
for (const string& entry : node_constraints) {
StringPiece s(entry);
if (str_util::ConsumePrefix(&s, kColocationGroupPrefix)) {
- current_constraints.insert(s.ToString());
+ current_constraints.insert(std::string(s));
}
}
} else {
diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc
index 6545e4ee3eb406436937a43ddac66d017af8e108..ff348fadb24e29a83bd6c8853aa67931f6df4182 100644
--- a/tensorflow/cc/gradients/array_grad.cc
+++ b/tensorflow/cc/gradients/array_grad.cc
@@ -385,6 +385,42 @@ Status MirrorPadGradGrad(const Scope& scope, const Operation& op,
}
REGISTER_GRADIENT_OP("MirrorPadGrad", MirrorPadGradGrad);
+Status StridedSliceGradHelper(const Scope& scope, const Operation& op,
+ const std::vector