From 2c858368c8c4b7e82c8d134786026a62a72d2676 Mon Sep 17 00:00:00 2001 From: Randy West Date: Mon, 18 Dec 2017 18:22:03 -0500 Subject: [PATCH 0001/2038] Compute test accuracy in batches to avoid OOM on GPUs. Reported here: https://github.com/tensorflow/tensorflow/issues/136 Alternative to this for mnist_deep.py: https://github.com/tensorflow/tensorflow/pull/157 --- tensorflow/examples/tutorials/mnist/mnist_deep.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/examples/tutorials/mnist/mnist_deep.py b/tensorflow/examples/tutorials/mnist/mnist_deep.py index 1e0294db27..2699738735 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_deep.py +++ b/tensorflow/examples/tutorials/mnist/mnist_deep.py @@ -34,6 +34,8 @@ from tensorflow.examples.tutorials.mnist import input_data import tensorflow as tf +import numpy + FLAGS = None @@ -164,8 +166,13 @@ def main(_): print('step %d, training accuracy %g' % (i, train_accuracy)) train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5}) - print('test accuracy %g' % accuracy.eval(feed_dict={ - x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0})) + # compute in batches to avoid OOM on GPUs + accuracy_l = [] + for i in range(50): + batch = mnist.test.next_batch(500, shuffle=False) + accuracy_l.append(accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})) + print('test accuracy %g' % numpy.mean(accuracy_l)) + if __name__ == '__main__': parser = argparse.ArgumentParser() -- GitLab From 3f18817317940253e6ec0e6b412492c5add5927b Mon Sep 17 00:00:00 2001 From: Randy West Date: Mon, 18 Dec 2017 23:18:30 -0500 Subject: [PATCH 0002/2038] Fix basic arithmetic fail + make loop pythonic --- tensorflow/examples/tutorials/mnist/mnist_deep.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/tutorials/mnist/mnist_deep.py b/tensorflow/examples/tutorials/mnist/mnist_deep.py index 2699738735..47d2777813 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_deep.py +++ b/tensorflow/examples/tutorials/mnist/mnist_deep.py @@ -168,7 +168,7 @@ def main(_): # compute in batches to avoid OOM on GPUs accuracy_l = [] - for i in range(50): + for _ in range(20): batch = mnist.test.next_batch(500, shuffle=False) accuracy_l.append(accuracy.eval(feed_dict={x: batch[0], y_: batch[1], keep_prob: 1.0})) print('test accuracy %g' % numpy.mean(accuracy_l)) -- GitLab From 67dee0adc09534483ce2627ffee629feb5133ae7 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Fri, 6 Apr 2018 03:26:26 +0800 Subject: [PATCH 0003/2038] Fix math equation rendering format in api definitions --- tensorflow/core/api_def/base_api/api_def_Exp.pbtxt | 2 +- .../core/api_def/base_api/api_def_GatherNd.pbtxt | 2 +- .../api_def/base_api/api_def_MatrixExponential.pbtxt | 2 +- .../api_def/base_api/api_def_MatrixLogarithm.pbtxt | 2 +- .../core/api_def/base_api/api_def_Polygamma.pbtxt | 2 +- .../core/api_def/base_api/api_def_ReduceJoin.pbtxt | 2 +- .../core/api_def/base_api/api_def_ScatterNdAdd.pbtxt | 4 ++-- .../base_api/api_def_ScatterNdNonAliasingAdd.pbtxt | 4 ++-- .../core/api_def/base_api/api_def_ScatterNdSub.pbtxt | 4 ++-- .../api_def/base_api/api_def_ScatterNdUpdate.pbtxt | 4 ++-- tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt | 2 +- .../api_def/base_api/api_def_SparseApplyAdagrad.pbtxt | 4 ++-- .../base_api/api_def_SparseApplyCenteredRMSProp.pbtxt | 6 +++--- .../api_def/base_api/api_def_SparseApplyFtrl.pbtxt | 10 +++++----- .../api_def/base_api/api_def_SparseApplyMomentum.pbtxt | 4 ++-- .../base_api/api_def_SparseApplyProximalAdagrad.pbtxt | 8 ++++---- .../api_def_SparseApplyProximalGradientDescent.pbtxt | 4 ++-- .../api_def/base_api/api_def_SparseApplyRMSProp.pbtxt | 6 +++--- .../api_def/base_api/api_def_UnsortedSegmentSum.pbtxt | 2 +- tensorflow/core/api_def/base_api/api_def_Zeta.pbtxt | 2 +- 20 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt index dd1e3d5dfc..01ac3d433a 100644 --- a/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt @@ -1,4 +1,4 @@ op { graph_op_name: "Exp" - summary: "Computes exponential of x element-wise. \\\\(y = e^x\\\\)." + summary: "Computes exponential of x element-wise. \\(y = e^x\\)." } diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt index 6cd76ff340..342a1f6b05 100644 --- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt @@ -25,7 +25,7 @@ END (K-1)-dimensional tensor of indices into `params`, where each element defines a slice of `params`: - output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]] + output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] Whereas in @{tf.gather} `indices` defines slices into the first dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt index 0d680f6531..d7b56aec87 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt @@ -18,7 +18,7 @@ END } summary: "Computes the matrix exponential of one or more square matrices:" description: < l1 else 0.0 -accum = accum_new +$$accum_new = accum + grad * grad$$ +$$linear += grad + (accum_{new}^{-lr_{power}} - accum^{-lr_{power}} / lr * var$$ +$$quadratic = 1.0 / (accum_{new}^{lr_{power}} * lr) + 2 * l2$$ +$$var = (sign(linear) * l1 - linear) / quadratic\ if\ |linear| > l1\ else\ 0.0$$ +$$accum = accum_{new}$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt index 8d9ac9ea3f..17dbb488de 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt @@ -64,7 +64,7 @@ Set use_nesterov = True if you want to use Nesterov momentum. That is for rows we have grad for, we update var and accum as follows: -accum = accum * momentum + grad -var -= lr * accum +$$accum = accum * momentum + grad$$ +$$var -= lr * accum$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt index 80541b91c7..0b24f2ddd1 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt @@ -58,9 +58,9 @@ END summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm." description: < Date: Tue, 10 Apr 2018 21:10:51 +0800 Subject: [PATCH 0004/2038] Remove breaking ``` for math equations --- tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt | 2 -- .../api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt | 4 +--- tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt | 4 +--- .../core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt | 4 +--- .../core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt | 2 +- 5 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt index ee0578c2ec..a9a7646314 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ -``` For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that addition would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt index 1e4f99006a..35116e5f6a 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt @@ -37,7 +37,7 @@ respect to both `input` and `updates`. `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `input`. -It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or `(P-K)`-dimensional slices @@ -45,9 +45,7 @@ indices into elements (if `K = P`) or `(P-K)`-dimensional slices `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$ -``` For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that addition would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt index e8fdd71785..99e5c4908b 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt @@ -42,7 +42,7 @@ within a given variable according to `indices`. `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `ref`. -It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ -``` For example, say we want to subtract 4 scattered elements from a rank-1 tensor with 8 elements. In Python, that subtraction would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt index 556a5d559b..cb57c171b9 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt @@ -42,7 +42,7 @@ variable according to `indices`. `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `ref`. -It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ -``` For example, say we want to update 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that update would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index ac1499346c..9aeabd030d 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -20,7 +20,7 @@ Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of segments. Computes a tensor such that -`\\(output[i] = sum_{j...} data[j...]\\)` where the sum is over tuples `j...` such +\\(output[i] = sum_{j...} data[j...]\\) where the sum is over tuples `j...` such that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` need not be sorted and need not cover all values in the full range of valid values. -- GitLab From 6583c9a693b122a49f17e7ec99463c6c3b7dbe98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:22:24 +0800 Subject: [PATCH 0005/2038] CLN: move _safe_embedding_lookup_sparse to embedding_ops and make it public --- tensorflow/python/BUILD | 2 + .../python/feature_column/feature_column.py | 161 +----------------- tensorflow/python/ops/embedding_ops.py | 157 +++++++++++++++++ tensorflow/python/ops/nn.py | 1 + 4 files changed, 163 insertions(+), 158 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index bb32f4bbe0..6e2e546984 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1845,6 +1845,8 @@ py_library( ":math_ops", ":platform", ":resource_variable_ops", + ":sparse_ops", + ":tensor_shape", ":variables", ], ) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index c16c3cda48..f48634d0c7 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -2058,7 +2058,7 @@ def _create_categorical_column_weighted_sum(column, initializer=init_ops.zeros_initializer(), trainable=trainable, collections=weight_collections) - return _safe_embedding_lookup_sparse( + return embedding_ops.safe_embedding_lookup_sparse( weight, id_tensor, sparse_weights=weight_tensor, @@ -2479,7 +2479,7 @@ class _EmbeddingColumn( }) # Return embedding lookup result. - return _safe_embedding_lookup_sparse( + return embedding_ops.safe_embedding_lookup_sparse( embedding_weights=embedding_weights, sparse_ids=sparse_ids, sparse_weights=sparse_weights, @@ -2612,7 +2612,7 @@ class _SharedEmbeddingColumn( }) # Return embedding lookup result. - return _safe_embedding_lookup_sparse( + return embedding_ops.safe_embedding_lookup_sparse( embedding_weights=embedding_weights, sparse_ids=sparse_ids, sparse_weights=sparse_weights, @@ -3065,161 +3065,6 @@ def _collect_leaf_level_keys(cross): return leaf_level_keys -# TODO(zakaria): Move this to embedding_ops and make it public. -def _safe_embedding_lookup_sparse(embedding_weights, - sparse_ids, - sparse_weights=None, - combiner='mean', - default_id=None, - name=None, - partition_strategy='div', - max_norm=None): - """Lookup embedding results, accounting for invalid IDs and empty features. - - The partitioned embedding in `embedding_weights` must all be the same shape - except for the first dimension. The first dimension is allowed to vary as the - vocabulary size is not necessarily a multiple of `P`. `embedding_weights` - may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a - partitioner. - - Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs - with non-positive weight. For an entry with no features, the embedding vector - for `default_id` is returned, or the 0-vector if `default_id` is not supplied. - - The ids and weights may be multi-dimensional. Embeddings are always aggregated - along the last dimension. - - Args: - embedding_weights: A list of `P` float `Tensor`s or values representing - partitioned embedding `Tensor`s. Alternatively, a `PartitionedVariable` - created by partitioning along dimension 0. The total unpartitioned - shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the - vocab size and `e_1, ..., e_m` are the embedding dimensions. - sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the - ids. `d_0` is typically batch size. - sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing - float weights corresponding to `sparse_ids`, or `None` if all weights - are be assumed to be 1.0. - combiner: A string specifying how to combine embedding results for each - entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" - the default. - default_id: The id to use for an entry with no features. - name: A name for this operation (optional). - partition_strategy: A string specifying the partitioning strategy. - Currently `"div"` and `"mod"` are supported. Default is `"div"`. - max_norm: If not `None`, all embeddings are l2-normalized to max_norm before - combining. - - - Returns: - Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. - - Raises: - ValueError: if `embedding_weights` is empty. - """ - if embedding_weights is None: - raise ValueError('Missing embedding_weights %s.' % embedding_weights) - if isinstance(embedding_weights, variables.PartitionedVariable): - embedding_weights = list(embedding_weights) # get underlying Variables. - if not isinstance(embedding_weights, list): - embedding_weights = [embedding_weights] - if len(embedding_weights) < 1: - raise ValueError('Missing embedding_weights %s.' % embedding_weights) - - dtype = sparse_weights.dtype if sparse_weights is not None else None - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] - - with ops.name_scope(name, 'embedding_lookup', - embedding_weights + [sparse_ids, - sparse_weights]) as scope: - # Reshape higher-rank sparse ids and weights to linear segment ids. - original_shape = sparse_ids.dense_shape - original_rank_dim = sparse_ids.dense_shape.get_shape()[0] - original_rank = ( - array_ops.size(original_shape) - if original_rank_dim.value is None - else original_rank_dim.value) - sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ - math_ops.reduce_prod( - array_ops.slice(original_shape, [0], [original_rank - 1])), - array_ops.gather(original_shape, original_rank - 1)]) - if sparse_weights is not None: - sparse_weights = sparse_tensor_lib.SparseTensor( - sparse_ids.indices, - sparse_weights.values, sparse_ids.dense_shape) - - # Prune invalid ids and weights. - sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) - if combiner != 'sum': - sparse_ids, sparse_weights = _prune_invalid_weights( - sparse_ids, sparse_weights) - - # Fill in dummy values for empty features, if necessary. - sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, - default_id or - 0) - if sparse_weights is not None: - sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) - - result = embedding_ops.embedding_lookup_sparse( - embedding_weights, - sparse_ids, - sparse_weights, - combiner=combiner, - partition_strategy=partition_strategy, - name=None if default_id is None else scope, - max_norm=max_norm) - - if default_id is None: - # Broadcast is_row_empty to the same shape as embedding_lookup_result, - # for use in Select. - is_row_empty = array_ops.tile( - array_ops.reshape(is_row_empty, [-1, 1]), - array_ops.stack([1, array_ops.shape(result)[1]])) - - result = array_ops.where(is_row_empty, - array_ops.zeros_like(result), - result, - name=scope) - - # Reshape back from linear ids back into higher-dimensional dense result. - final_result = array_ops.reshape( - result, - array_ops.concat([ - array_ops.slice( - math_ops.cast(original_shape, dtypes.int32), [0], - [original_rank - 1]), - array_ops.slice(array_ops.shape(result), [1], [-1]) - ], 0)) - final_result.set_shape(tensor_shape.unknown_shape( - (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) - return final_result - - -def _prune_invalid_ids(sparse_ids, sparse_weights): - """Prune invalid IDs (< 0) from the input ids and weights.""" - is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) - if sparse_weights is not None: - is_id_valid = math_ops.logical_and( - is_id_valid, - array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) - if sparse_weights is not None: - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) - return sparse_ids, sparse_weights - - -def _prune_invalid_weights(sparse_ids, sparse_weights): - """Prune invalid weights (< 0) from the input ids and weights.""" - if sparse_weights is not None: - is_weights_valid = math_ops.greater(sparse_weights.values, 0) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) - return sparse_ids, sparse_weights - - class _IndicatorColumn(_DenseColumn, _SequenceDenseColumn, collections.namedtuple('_IndicatorColumn', ['categorical_column'])): diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 9e46739bc1..a8cfeca119 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -23,6 +23,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops # Imports gradient definitions. @@ -30,6 +31,7 @@ from tensorflow.python.ops import data_flow_grad # pylint: disable=unused-impor from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export @@ -480,3 +482,158 @@ def embedding_lookup_sparse(params, assert False, "Unrecognized combiner" return embeddings + + +@tf_export("nn.safe_embedding_lookup_sparse") +def safe_embedding_lookup_sparse(embedding_weights, + sparse_ids, + sparse_weights=None, + combiner='mean', + default_id=None, + name=None, + partition_strategy='div', + max_norm=None): + """Lookup embedding results, accounting for invalid IDs and empty features. + + The partitioned embedding in `embedding_weights` must all be the same shape + except for the first dimension. The first dimension is allowed to vary as the + vocabulary size is not necessarily a multiple of `P`. `embedding_weights` + may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a + partitioner. + + Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs + with non-positive weight. For an entry with no features, the embedding vector + for `default_id` is returned, or the 0-vector if `default_id` is not supplied. + + The ids and weights may be multi-dimensional. Embeddings are always aggregated + along the last dimension. + + Args: + embedding_weights: A list of `P` float `Tensor`s or values representing + partitioned embedding `Tensor`s. Alternatively, a `PartitionedVariable` + created by partitioning along dimension 0. The total unpartitioned + shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the + vocab size and `e_1, ..., e_m` are the embedding dimensions. + sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the + ids. `d_0` is typically batch size. + sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing + float weights corresponding to `sparse_ids`, or `None` if all weights + are be assumed to be 1.0. + combiner: A string specifying how to combine embedding results for each + entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" + the default. + default_id: The id to use for an entry with no features. + name: A name for this operation (optional). + partition_strategy: A string specifying the partitioning strategy. + Currently `"div"` and `"mod"` are supported. Default is `"div"`. + max_norm: If not `None`, all embeddings are l2-normalized to max_norm before + combining. + + + Returns: + Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. + + Raises: + ValueError: if `embedding_weights` is empty. + """ + if embedding_weights is None: + raise ValueError('Missing embedding_weights %s.' % embedding_weights) + if isinstance(embedding_weights, variables.PartitionedVariable): + embedding_weights = list(embedding_weights) # get underlying Variables. + if not isinstance(embedding_weights, list): + embedding_weights = [embedding_weights] + if len(embedding_weights) < 1: + raise ValueError('Missing embedding_weights %s.' % embedding_weights) + + dtype = sparse_weights.dtype if sparse_weights is not None else None + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] + + with ops.name_scope(name, 'embedding_lookup', + embedding_weights + [sparse_ids, + sparse_weights]) as scope: + # Reshape higher-rank sparse ids and weights to linear segment ids. + original_shape = sparse_ids.dense_shape + original_rank_dim = sparse_ids.dense_shape.get_shape()[0] + original_rank = ( + array_ops.size(original_shape) + if original_rank_dim.value is None + else original_rank_dim.value) + sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ + math_ops.reduce_prod( + array_ops.slice(original_shape, [0], [original_rank - 1])), + array_ops.gather(original_shape, original_rank - 1)]) + if sparse_weights is not None: + sparse_weights = sparse_tensor.SparseTensor( + sparse_ids.indices, + sparse_weights.values, sparse_ids.dense_shape) + + # Prune invalid ids and weights. + sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) + if combiner != 'sum': + sparse_ids, sparse_weights = _prune_invalid_weights( + sparse_ids, sparse_weights) + + # Fill in dummy values for empty features, if necessary. + sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, + default_id or + 0) + if sparse_weights is not None: + sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) + + result = embedding_lookup_sparse( + embedding_weights, + sparse_ids, + sparse_weights, + combiner=combiner, + partition_strategy=partition_strategy, + name=None if default_id is None else scope, + max_norm=max_norm) + + if default_id is None: + # Broadcast is_row_empty to the same shape as embedding_lookup_result, + # for use in Select. + is_row_empty = array_ops.tile( + array_ops.reshape(is_row_empty, [-1, 1]), + array_ops.stack([1, array_ops.shape(result)[1]])) + + result = array_ops.where(is_row_empty, + array_ops.zeros_like(result), + result, + name=scope) + + # Reshape back from linear ids back into higher-dimensional dense result. + final_result = array_ops.reshape( + result, + array_ops.concat([ + array_ops.slice( + math_ops.cast(original_shape, dtypes.int32), [0], + [original_rank - 1]), + array_ops.slice(array_ops.shape(result), [1], [-1]) + ], 0)) + final_result.set_shape(tensor_shape.unknown_shape( + (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) + return final_result + + +def _prune_invalid_ids(sparse_ids, sparse_weights): + """Prune invalid IDs (< 0) from the input ids and weights.""" + is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) + if sparse_weights is not None: + is_id_valid = math_ops.logical_and( + is_id_valid, + array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) + if sparse_weights is not None: + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) + return sparse_ids, sparse_weights + + +def _prune_invalid_weights(sparse_ids, sparse_weights): + """Prune invalid weights (< 0) from the input ids and weights.""" + if sparse_weights is not None: + is_weights_valid = math_ops.greater(sparse_weights.values, 0) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) + return sparse_ids, sparse_weights diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index 1d0d9a52a1..fb896bf042 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -79,6 +79,7 @@ See the @{$python/nn} guide. @@weighted_cross_entropy_with_logits @@embedding_lookup @@embedding_lookup_sparse +@@safe_embedding_lookup_sparse @@dynamic_rnn @@bidirectional_dynamic_rnn @@raw_rnn -- GitLab From 608508c35a4b87a17b9f07364e6fbeae2fa948c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:32:38 +0800 Subject: [PATCH 0006/2038] CLN: move the corresponding test case --- .../python/layers/embedding_ops_test.py | 1 - tensorflow/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/embedding_ops_test.py | 218 ++++++++++++++++++ 3 files changed, 219 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index bf25144982..87f00f94a6 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -21,7 +21,6 @@ from __future__ import print_function import itertools import math -import sys import numpy as np diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ba8f1fd3ca..2fbdde849b 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2717,6 +2717,7 @@ cuda_py_test( "//tensorflow/python:embedding_ops", "//tensorflow/python:framework", "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:init_ops", "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", "//tensorflow/python:partitioned_variables", diff --git a/tensorflow/python/kernel_tests/embedding_ops_test.py b/tensorflow/python/kernel_tests/embedding_ops_test.py index e53ca1dcaa..55d75cb474 100644 --- a/tensorflow/python/kernel_tests/embedding_ops_test.py +++ b/tensorflow/python/kernel_tests/embedding_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import itertools +import math import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin @@ -31,6 +32,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import init_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables @@ -736,6 +738,222 @@ class EmbeddingLookupSparseTest(test.TestCase): x, sp_ids, sp_weights, combiner="mean") +class SafeEmbeddingLookupSparseTest(test.TestCase): + + def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): + assert vocab_size > 0 + assert embed_dim > 0 + assert num_shards > 0 + assert num_shards <= vocab_size + + embedding_weights = partitioned_variables.create_partitioned_variables( + shape=[vocab_size, embed_dim], + slicing=[num_shards, 1], + initializer=init_ops.truncated_normal_initializer( + mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32)) + for w in embedding_weights: + w.initializer.run() + embedding_weights = [w.eval() for w in embedding_weights] + return embedding_weights + + def _ids_and_weights_2d(self): + # Each row demonstrates a test case: + # Row 0: multiple valid ids, 1 invalid id, weighted mean + # Row 1: all ids are invalid (leaving no valid ids after pruning) + # Row 2: no ids to begin with + # Row 3: single id + # Row 4: all ids have <=0 weight + indices = [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [4, 0], [4, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [5, 4] + + sparse_ids = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def _ids_and_weights_3d(self): + # Each (2-D) index demonstrates a test case: + # Index 0, 0: multiple valid ids, 1 invalid id, weighted mean + # Index 0, 1: all ids are invalid (leaving no valid ids after pruning) + # Index 0, 2: no ids to begin with + # Index 1, 0: single id + # Index 1, 1: all ids have <=0 weight + # Index 1, 2: no ids to begin with + indices = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [1, 0, 0], [1, 1, 0], + [1, 1, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [2, 3, 4] + + sparse_ids = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def test_safe_embedding_lookup_sparse_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4]) + + def test_safe_embedding_lookup_sparse_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3], + embedding_weights[0][2], embedding_weights[0][3]]) + + def test_safe_embedding_lookup_sparse_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, + [0] * 4, embedding_weights[0][2], ( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, + [(embedding_weights[0] + embedding_weights[1]) / 2.0, + [0] * 4, [0] * 4, embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose(embedding_lookup_result, [[ + (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, + [0] * 4, [0] * 4 + ], [embedding_weights[0][2], [0] * 4, [0] * 4]]) + + def test_safe_embedding_lookup_sparse_3d_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [[(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3]], [ + embedding_weights[0][2], embedding_weights[0][3], + embedding_weights[0][3] + ]]) + + def test_safe_embedding_lookup_sparse_3d_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose(embedding_lookup_result, [[( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [ + 0 + ] * 4], [ + embedding_weights[0][2], + (embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, [[ + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4, [0] * 4 + ], [ + embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights( + self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + class DynamicStitchOpTest(test.TestCase): def testCint32Cpu(self): -- GitLab From 067c85fb66345e61aee9428cd645cca786ed2bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:52:33 +0800 Subject: [PATCH 0007/2038] CLN: delete duplicate codes --- .../layers/python/layers/embedding_ops.py | 117 ++---------------- .../python/layers/embedding_ops_test.py | 4 +- 2 files changed, 11 insertions(+), 110 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 49c3faf3b7..4353bf9c28 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -19,14 +19,12 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin -from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util from tensorflow.contrib.layers.python.ops import sparse_feature_cross_op from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops @@ -100,112 +98,15 @@ def safe_embedding_lookup_sparse(embedding_weights, logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" - if embedding_weights is None: - raise ValueError("Missing embedding_weights %s." % embedding_weights) - if isinstance(embedding_weights, variables.PartitionedVariable): - embedding_weights = list(embedding_weights) # get underlying Variables. - if not isinstance(embedding_weights, list): - embedding_weights = [embedding_weights] - if len(embedding_weights) < 1: - raise ValueError("Missing embedding_weights %s." % embedding_weights) - - dtype = sparse_weights.dtype if sparse_weights is not None else None - if isinstance(embedding_weights, variables.PartitionedVariable): - embedding_weights = list(embedding_weights) - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] - - contrib_tensor_util.assert_same_float_dtype(embedding_weights + - [sparse_weights]) - - with ops.name_scope(name, "embedding_lookup", - embedding_weights + [sparse_ids, - sparse_weights]) as scope: - # Reshape higher-rank sparse ids and weights to linear segment ids. - original_shape = sparse_ids.dense_shape - original_rank_dim = sparse_ids.dense_shape.get_shape()[0] - original_rank = ( - array_ops.size(original_shape) - if original_rank_dim.value is None - else original_rank_dim.value) - sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ - math_ops.reduce_prod( - array_ops.slice(original_shape, [0], [original_rank - 1])), - array_ops.gather(original_shape, original_rank - 1)]) - if sparse_weights is not None: - sparse_weights = sparse_tensor.SparseTensor( - sparse_ids.indices, - sparse_weights.values, sparse_ids.dense_shape) - - # Prune invalid ids and weights. - sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) - if combiner != "sum": - sparse_ids, sparse_weights = _prune_invalid_weights( - sparse_ids, sparse_weights) - - # Fill in dummy values for empty features, if necessary. - sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, - default_id or - 0) - if sparse_weights is not None: - sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) - - result = embedding_ops.embedding_lookup_sparse( - embedding_weights, - sparse_ids, - sparse_weights, - combiner=combiner, - partition_strategy=partition_strategy, - name=None if default_id is None else scope, - max_norm=max_norm) - - if default_id is None: - # Broadcast is_row_empty to the same shape as embedding_lookup_result, - # for use in Select. - is_row_empty = array_ops.tile( - array_ops.reshape(is_row_empty, [-1, 1]), - array_ops.stack([1, array_ops.shape(result)[1]])) - - result = array_ops.where(is_row_empty, - array_ops.zeros_like(result), - result, - name=scope) - - # Reshape back from linear ids back into higher-dimensional dense result. - final_result = array_ops.reshape( - result, - array_ops.concat([ - array_ops.slice( - math_ops.cast(original_shape, dtypes.int32), [0], - [original_rank - 1]), - array_ops.slice(array_ops.shape(result), [1], [-1]) - ], 0)) - final_result.set_shape(tensor_shape.unknown_shape( - (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) - return final_result - - -def _prune_invalid_ids(sparse_ids, sparse_weights): - """Prune invalid IDs (< 0) from the input ids and weights.""" - is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) - if sparse_weights is not None: - is_id_valid = math_ops.logical_and( - is_id_valid, - array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) - if sparse_weights is not None: - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) - return sparse_ids, sparse_weights - - -def _prune_invalid_weights(sparse_ids, sparse_weights): - """Prune invalid weights (< 0) from the input ids and weights.""" - if sparse_weights is not None: - is_weights_valid = math_ops.greater(sparse_weights.values, 0) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) - return sparse_ids, sparse_weights + return embedding_ops.safe_embedding_lookup_sparse( + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=combiner, + default_id=default_id, + name=name, + partition_strategy=partition_strategy, + max_norm=max_norm) def scattered_embedding_lookup(params, diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index 87f00f94a6..4d9849b4b1 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -168,7 +168,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_2d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) @@ -245,7 +245,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_3d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) -- GitLab From 91ad552a52242b3d382eee6a3382c79be36b7df7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:57:47 +0800 Subject: [PATCH 0008/2038] CLN: delete test cases in contrib --- .../python/layers/embedding_ops_test.py | 217 ------------------ 1 file changed, 217 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index 4d9849b4b1..f7b7ade39d 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -20,7 +20,6 @@ from __future__ import division from __future__ import print_function import itertools -import math import numpy as np @@ -39,222 +38,6 @@ from tensorflow.python.platform import test from tensorflow.python.util import compat -class SafeEmbeddingLookupSparseTest(test.TestCase): - - def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): - assert vocab_size > 0 - assert embed_dim > 0 - assert num_shards > 0 - assert num_shards <= vocab_size - - embedding_weights = partitioned_variables.create_partitioned_variables( - shape=[vocab_size, embed_dim], - slicing=[num_shards, 1], - initializer=init_ops.truncated_normal_initializer( - mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32)) - for w in embedding_weights: - w.initializer.run() - embedding_weights = [w.eval() for w in embedding_weights] - return embedding_weights - - def _ids_and_weights_2d(self): - # Each row demonstrates a test case: - # Row 0: multiple valid ids, 1 invalid id, weighted mean - # Row 1: all ids are invalid (leaving no valid ids after pruning) - # Row 2: no ids to begin with - # Row 3: single id - # Row 4: all ids have <=0 weight - indices = [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [4, 0], [4, 1]] - ids = [0, 1, -1, -1, 2, 0, 1] - weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] - shape = [5, 4] - - sparse_ids = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(ids, dtypes.int64), - constant_op.constant(shape, dtypes.int64)) - - sparse_weights = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(weights, dtypes.float32), - constant_op.constant(shape, dtypes.int64)) - - return sparse_ids, sparse_weights - - def _ids_and_weights_3d(self): - # Each (2-D) index demonstrates a test case: - # Index 0, 0: multiple valid ids, 1 invalid id, weighted mean - # Index 0, 1: all ids are invalid (leaving no valid ids after pruning) - # Index 0, 2: no ids to begin with - # Index 1, 0: single id - # Index 1, 1: all ids have <=0 weight - # Index 1, 2: no ids to begin with - indices = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [1, 0, 0], [1, 1, 0], - [1, 1, 1]] - ids = [0, 1, -1, -1, 2, 0, 1] - weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] - shape = [2, 3, 4] - - sparse_ids = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(ids, dtypes.int64), - constant_op.constant(shape, dtypes.int64)) - - sparse_weights = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(weights, dtypes.float32), - constant_op.constant(shape, dtypes.int64)) - - return sparse_ids, sparse_weights - - def test_safe_embedding_lookup_sparse_return_zero_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights).eval()) - - self.assertAllClose( - embedding_lookup_result, - [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / - 3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4]) - - def test_safe_embedding_lookup_sparse_return_special_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) - - self.assertAllClose( - embedding_lookup_result, - [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / - 3.0, embedding_weights[0][3], embedding_weights[0][3], - embedding_weights[0][2], embedding_weights[0][3]]) - - def test_safe_embedding_lookup_sparse_no_weights(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, _ = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - self.assertAllClose( - embedding_lookup_result, - [(embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, - [0] * 4, embedding_weights[0][2], ( - embedding_weights[0][0] + embedding_weights[0][1]) / 2.0]) - - def test_safe_embedding_lookup_sparse_partitioned(self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, _ = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - embedding_weights = list(itertools.chain(*embedding_weights)) - self.assertAllClose(embedding_lookup_result, - [(embedding_weights[0] + embedding_weights[1]) / 2.0, - [0] * 4, [0] * 4, embedding_weights[2], - (embedding_weights[0] + embedding_weights[1]) / 2.0]) - - def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, sparse_weights = self._ids_and_weights_2d() - - embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids) - embedding_weights = [ - constant_op.constant(w, dtype=dtypes.float64) - for w in embedding_weights - ] - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids, sparse_weights) - - def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights).eval()) - - self.assertAllClose(embedding_lookup_result, [[ - (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, - [0] * 4, [0] * 4 - ], [embedding_weights[0][2], [0] * 4, [0] * 4]]) - - def test_safe_embedding_lookup_sparse_3d_return_special_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) - - self.assertAllClose( - embedding_lookup_result, - [[(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / - 3.0, embedding_weights[0][3], embedding_weights[0][3]], [ - embedding_weights[0][2], embedding_weights[0][3], - embedding_weights[0][3] - ]]) - - def test_safe_embedding_lookup_sparse_3d_no_weights(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, _ = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - self.assertAllClose(embedding_lookup_result, [[( - embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [ - 0 - ] * 4], [ - embedding_weights[0][2], - (embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4 - ]]) - - def test_safe_embedding_lookup_sparse_3d_partitioned(self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, _ = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - embedding_weights = list(itertools.chain(*embedding_weights)) - self.assertAllClose(embedding_lookup_result, [[ - (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4, [0] * 4 - ], [ - embedding_weights[2], - (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4 - ]]) - - def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights( - self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, sparse_weights = self._ids_and_weights_3d() - - embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids) - embedding_weights = [ - constant_op.constant(w, dtype=dtypes.float64) - for w in embedding_weights - ] - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids, sparse_weights) - - class ScatteredEmbeddingLookupTest(test.TestCase): def setUp(self): -- GitLab From 24a6350ad173865c16351825f251f2fde97b7d9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 26 Apr 2018 07:01:06 +0800 Subject: [PATCH 0009/2038] Revert "CLN: delete test cases in contrib" This reverts commit 91ad552a52242b3d382eee6a3382c79be36b7df7. --- .../python/layers/embedding_ops_test.py | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index f7b7ade39d..4d9849b4b1 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import itertools +import math import numpy as np @@ -38,6 +39,222 @@ from tensorflow.python.platform import test from tensorflow.python.util import compat +class SafeEmbeddingLookupSparseTest(test.TestCase): + + def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): + assert vocab_size > 0 + assert embed_dim > 0 + assert num_shards > 0 + assert num_shards <= vocab_size + + embedding_weights = partitioned_variables.create_partitioned_variables( + shape=[vocab_size, embed_dim], + slicing=[num_shards, 1], + initializer=init_ops.truncated_normal_initializer( + mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32)) + for w in embedding_weights: + w.initializer.run() + embedding_weights = [w.eval() for w in embedding_weights] + return embedding_weights + + def _ids_and_weights_2d(self): + # Each row demonstrates a test case: + # Row 0: multiple valid ids, 1 invalid id, weighted mean + # Row 1: all ids are invalid (leaving no valid ids after pruning) + # Row 2: no ids to begin with + # Row 3: single id + # Row 4: all ids have <=0 weight + indices = [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [4, 0], [4, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [5, 4] + + sparse_ids = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def _ids_and_weights_3d(self): + # Each (2-D) index demonstrates a test case: + # Index 0, 0: multiple valid ids, 1 invalid id, weighted mean + # Index 0, 1: all ids are invalid (leaving no valid ids after pruning) + # Index 0, 2: no ids to begin with + # Index 1, 0: single id + # Index 1, 1: all ids have <=0 weight + # Index 1, 2: no ids to begin with + indices = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [1, 0, 0], [1, 1, 0], + [1, 1, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [2, 3, 4] + + sparse_ids = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def test_safe_embedding_lookup_sparse_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4]) + + def test_safe_embedding_lookup_sparse_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3], + embedding_weights[0][2], embedding_weights[0][3]]) + + def test_safe_embedding_lookup_sparse_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, + [0] * 4, embedding_weights[0][2], ( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, + [(embedding_weights[0] + embedding_weights[1]) / 2.0, + [0] * 4, [0] * 4, embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose(embedding_lookup_result, [[ + (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, + [0] * 4, [0] * 4 + ], [embedding_weights[0][2], [0] * 4, [0] * 4]]) + + def test_safe_embedding_lookup_sparse_3d_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [[(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3]], [ + embedding_weights[0][2], embedding_weights[0][3], + embedding_weights[0][3] + ]]) + + def test_safe_embedding_lookup_sparse_3d_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose(embedding_lookup_result, [[( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [ + 0 + ] * 4], [ + embedding_weights[0][2], + (embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, [[ + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4, [0] * 4 + ], [ + embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights( + self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + class ScatteredEmbeddingLookupTest(test.TestCase): def setUp(self): -- GitLab From 1c1b4d47707a439c157b5dcf3755e391730a328c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 26 Apr 2018 07:01:21 +0800 Subject: [PATCH 0010/2038] Revert "CLN: delete duplicate codes" This reverts commit 067c85fb66345e61aee9428cd645cca786ed2bf4. --- .../layers/python/layers/embedding_ops.py | 117 ++++++++++++++++-- .../python/layers/embedding_ops_test.py | 4 +- 2 files changed, 110 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 4353bf9c28..49c3faf3b7 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -19,12 +19,14 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util from tensorflow.contrib.layers.python.ops import sparse_feature_cross_op from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops @@ -98,15 +100,112 @@ def safe_embedding_lookup_sparse(embedding_weights, logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" - return embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=combiner, - default_id=default_id, - name=name, - partition_strategy=partition_strategy, - max_norm=max_norm) + if embedding_weights is None: + raise ValueError("Missing embedding_weights %s." % embedding_weights) + if isinstance(embedding_weights, variables.PartitionedVariable): + embedding_weights = list(embedding_weights) # get underlying Variables. + if not isinstance(embedding_weights, list): + embedding_weights = [embedding_weights] + if len(embedding_weights) < 1: + raise ValueError("Missing embedding_weights %s." % embedding_weights) + + dtype = sparse_weights.dtype if sparse_weights is not None else None + if isinstance(embedding_weights, variables.PartitionedVariable): + embedding_weights = list(embedding_weights) + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] + + contrib_tensor_util.assert_same_float_dtype(embedding_weights + + [sparse_weights]) + + with ops.name_scope(name, "embedding_lookup", + embedding_weights + [sparse_ids, + sparse_weights]) as scope: + # Reshape higher-rank sparse ids and weights to linear segment ids. + original_shape = sparse_ids.dense_shape + original_rank_dim = sparse_ids.dense_shape.get_shape()[0] + original_rank = ( + array_ops.size(original_shape) + if original_rank_dim.value is None + else original_rank_dim.value) + sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ + math_ops.reduce_prod( + array_ops.slice(original_shape, [0], [original_rank - 1])), + array_ops.gather(original_shape, original_rank - 1)]) + if sparse_weights is not None: + sparse_weights = sparse_tensor.SparseTensor( + sparse_ids.indices, + sparse_weights.values, sparse_ids.dense_shape) + + # Prune invalid ids and weights. + sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) + if combiner != "sum": + sparse_ids, sparse_weights = _prune_invalid_weights( + sparse_ids, sparse_weights) + + # Fill in dummy values for empty features, if necessary. + sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, + default_id or + 0) + if sparse_weights is not None: + sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) + + result = embedding_ops.embedding_lookup_sparse( + embedding_weights, + sparse_ids, + sparse_weights, + combiner=combiner, + partition_strategy=partition_strategy, + name=None if default_id is None else scope, + max_norm=max_norm) + + if default_id is None: + # Broadcast is_row_empty to the same shape as embedding_lookup_result, + # for use in Select. + is_row_empty = array_ops.tile( + array_ops.reshape(is_row_empty, [-1, 1]), + array_ops.stack([1, array_ops.shape(result)[1]])) + + result = array_ops.where(is_row_empty, + array_ops.zeros_like(result), + result, + name=scope) + + # Reshape back from linear ids back into higher-dimensional dense result. + final_result = array_ops.reshape( + result, + array_ops.concat([ + array_ops.slice( + math_ops.cast(original_shape, dtypes.int32), [0], + [original_rank - 1]), + array_ops.slice(array_ops.shape(result), [1], [-1]) + ], 0)) + final_result.set_shape(tensor_shape.unknown_shape( + (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) + return final_result + + +def _prune_invalid_ids(sparse_ids, sparse_weights): + """Prune invalid IDs (< 0) from the input ids and weights.""" + is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) + if sparse_weights is not None: + is_id_valid = math_ops.logical_and( + is_id_valid, + array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) + if sparse_weights is not None: + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) + return sparse_ids, sparse_weights + + +def _prune_invalid_weights(sparse_ids, sparse_weights): + """Prune invalid weights (< 0) from the input ids and weights.""" + if sparse_weights is not None: + is_weights_valid = math_ops.greater(sparse_weights.values, 0) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) + return sparse_ids, sparse_weights def scattered_embedding_lookup(params, diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index 4d9849b4b1..87f00f94a6 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -168,7 +168,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_2d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) @@ -245,7 +245,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_3d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) -- GitLab From f2a0bc58db70cc792649672b81317288c4151ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 26 Apr 2018 20:52:03 +0800 Subject: [PATCH 0011/2038] BLD: update golden --- tensorflow/tools/api/golden/tensorflow.nn.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index 455590d866..d9e5b0d0fc 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -260,6 +260,10 @@ tf_module { name: "relu_layer" argspec: "args=[\'x\', \'weights\', \'biases\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "safe_embedding_lookup_sparse" + argspec: "args=[\'embedding_weights\', \'sparse_ids\', \'sparse_weights\', \'combiner\', \'default_id\', \'name\', \'partition_strategy\', \'max_norm\'], varargs=None, keywords=None, defaults=[\'None\', \'mean\', \'None\', \'None\', \'div\', \'None\'], " + } member_method { name: "sampled_softmax_loss" argspec: "args=[\'weights\', \'biases\', \'labels\', \'inputs\', \'num_sampled\', \'num_classes\', \'num_true\', \'sampled_values\', \'remove_accidental_hits\', \'partition_strategy\', \'name\', \'seed\'], varargs=None, keywords=None, defaults=[\'1\', \'None\', \'True\', \'mod\', \'sampled_softmax_loss\', \'None\'], " -- GitLab From ad4cb2d268bd90c81525418d03ebb31aae3d80f3 Mon Sep 17 00:00:00 2001 From: tucan9389 Date: Wed, 9 May 2018 00:32:19 +0900 Subject: [PATCH 0012/2038] Just fix little issue that is model_file_type variable but not using it. --- .../lite/examples/ios/camera/CameraExampleViewController.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm index d74e275f04..30fee64a6f 100644 --- a/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm +++ b/tensorflow/contrib/lite/examples/ios/camera/CameraExampleViewController.mm @@ -315,7 +315,7 @@ static void GetTopN(const uint8_t* prediction, const int prediction_size, const labelLayers = [[NSMutableArray alloc] init]; oldPredictionValues = [[NSMutableDictionary alloc] init]; - NSString* graph_path = FilePathForResourceName(model_file_name, @"tflite"); + NSString* graph_path = FilePathForResourceName(model_file_name, model_file_type); model = tflite::FlatBufferModel::BuildFromFile([graph_path UTF8String]); if (!model) { LOG(FATAL) << "Failed to mmap model " << graph_path; -- GitLab From 5eb246cb79e37b6a7006b6dead99219ffd25de31 Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Wed, 16 May 2018 17:05:24 +0100 Subject: [PATCH 0013/2038] Don't do int64 tests for devices which do not support int64 --- tensorflow/compiler/tests/binary_ops_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 1e4dd32916..64eeed8312 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -686,11 +686,12 @@ class BinaryOpsTest(XLATestCase): np.array([[10], [7], [2]], dtype=np.float32), np.float32(7), expected=np.array([[False], [False], [True]], dtype=np.bool)) - self._testBinary( - less_op, - np.array([[10], [7], [2], [-1]], dtype=np.int64), - np.int64(7), - expected=np.array([[False], [False], [True], [True]], dtype=np.bool)) + if np.int64 in self.numeric_types: + self._testBinary( + less_op, + np.array([[10], [7], [2], [-1]], dtype=np.int64), + np.int64(7), + expected=np.array([[False], [False], [True], [True]], dtype=np.bool)) for less_equal_op in [math_ops.less_equal, (lambda x, y: x <= y)]: self._testBinary( -- GitLab From 05e4d01dd5db5125969b29405bbf9c4eea4a0cd3 Mon Sep 17 00:00:00 2001 From: nrstott Date: Fri, 18 May 2018 11:15:21 -0400 Subject: [PATCH 0014/2038] accept pd.DataFrame as y for pandas_input_fn --- .../python/estimator/inputs/pandas_io.py | 15 ++++++++--- .../python/estimator/inputs/pandas_io_test.py | 25 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index bd06843021..abf3f33519 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -50,7 +50,7 @@ def pandas_input_fn(x, Args: x: pandas `DataFrame` object. - y: pandas `Series` object. `None` if absent. + y: pandas `Series` object or `DataFrame`. `None` if absent. batch_size: int, size of batches to return. num_epochs: int, number of epochs to iterate over data. If not `None`, read attempts that would exceed this value will raise `OutOfRangeError`. @@ -87,7 +87,13 @@ def pandas_input_fn(x, if not np.array_equal(x.index, y.index): raise ValueError('Index for x and y are mismatched.\nIndex for x: %s\n' 'Index for y: %s\n' % (x.index, y.index)) - x[target_column] = y + if isinstance(y, pd.DataFrame): + target_column = list(y) + print(target_column) + x[target_column] = y + print(x) + else: + x[target_column] = y # TODO(mdan): These are memory copies. We probably don't need 4x slack space. # The sizes below are consistent with what I've seen elsewhere. @@ -117,7 +123,10 @@ def pandas_input_fn(x, features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: - target = features.pop(target_column) + if isinstance(target_column, list): + target = {column: features.pop(column) for column in target_column} + else: + target = features.pop(target_column) return features, target return features return input_fn diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index e5912a3b28..f4970f07b3 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -47,6 +47,16 @@ class PandasIoTest(test.TestCase): y = pd.Series(np.arange(-32, -28), index=index) return x, y + def makeTestDataFrameWithYAsDataFrame(self): + index = np.arange(100, 104) + a = np.arange(4) + b = np.arange(32, 36) + a_label = np.arange(10, 14) + b_label = np.arange(50, 54) + x = pd.DataFrame({'a': a, 'b': b}, index=index) + y = pd.DataFrame({'a_target': a_label, 'b_target': b_label}, index=index) + return x, y + def callInputFnOnce(self, input_fn, session): results = input_fn() coord = coordinator.Coordinator() @@ -89,6 +99,21 @@ class PandasIoTest(test.TestCase): self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31]) + def testPandasInputFnWhenYIsDataFrame_ProducesExpectedOutput(self): + if not HAS_PANDAS: + return + with self.test_session() as session: + x, y = self.makeTestDataFrameWithYAsDataFrame() + input_fn = pandas_io.pandas_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + + features, targets = self.callInputFnOnce(input_fn, session) + + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertAllEqual(targets['a_target'], [10, 11]) + self.assertAllEqual(targets['b_target'], [50, 51]) + def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return -- GitLab From 45fb10adbde00a82af4576e0de01a5012b0b1ad8 Mon Sep 17 00:00:00 2001 From: nrstott Date: Fri, 18 May 2018 12:01:03 -0400 Subject: [PATCH 0015/2038] handle overlapping columns in pandas_input_fn when y is df --- .../python/estimator/inputs/pandas_io.py | 25 ++++++++++++++++--- .../python/estimator/inputs/pandas_io_test.py | 16 ++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index abf3f33519..6918683ce7 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -35,6 +35,22 @@ except ImportError: HAS_PANDAS = False +def _get_unique_target_key(features, target_column_name): + """Returns a key that does not exist in the input DataFrame `features`. + + Args: + features: DataFrame + target_column_name: Name of the target column as a `str` + + Returns: + A unique key that can be used to insert the target into + features. + """ + while target_column_name in features: + target_column_name += '_n' + return target_column_name + + @tf_export('estimator.inputs.pandas_input_fn') def pandas_input_fn(x, y=None, @@ -88,10 +104,9 @@ def pandas_input_fn(x, raise ValueError('Index for x and y are mismatched.\nIndex for x: %s\n' 'Index for y: %s\n' % (x.index, y.index)) if isinstance(y, pd.DataFrame): - target_column = list(y) - print(target_column) + y_columns = [(column, _get_unique_target_key(x, column)) for column in list(y)] + target_column = [v for _, v in y_columns] x[target_column] = y - print(x) else: x[target_column] = y @@ -124,7 +139,9 @@ def pandas_input_fn(x, features = dict(zip(list(x.columns), features)) if y is not None: if isinstance(target_column, list): - target = {column: features.pop(column) for column in target_column} + keys = [k for k, _ in y_columns] + values = [features.pop(column) for column in target_column] + target = {k: v for k, v in zip(keys, values)} else: target = features.pop(target_column) return features, target diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index f4970f07b3..f8546abb8a 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -114,6 +114,22 @@ class PandasIoTest(test.TestCase): self.assertAllEqual(targets['a_target'], [10, 11]) self.assertAllEqual(targets['b_target'], [50, 51]) + def testPandasInputFnWhenYIsDataFrame_HandlesOverlappingColumnNames(self): + if not HAS_PANDAS: + return + with self.test_session() as session: + x, y = self.makeTestDataFrameWithYAsDataFrame() + y = y.rename(columns={'a_target': 'a', 'b_target': 'b'}) + input_fn = pandas_io.pandas_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + + features, targets = self.callInputFnOnce(input_fn, session) + + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertAllEqual(targets['a'], [10, 11]) + self.assertAllEqual(targets['b'], [50, 51]) + def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return -- GitLab From a611e3afdf1dad0ed485af6cf4b8aa94de744511 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 18 May 2018 11:32:50 -0700 Subject: [PATCH 0016/2038] enhancement with conv2d bwd primitive reuse and related conv fwd refactoring --- .../core/kernels/mkl_conv_grad_filter_ops.cc | 658 ++++++++++++++---- .../core/kernels/mkl_conv_grad_input_ops.cc | 474 +++++++++++-- tensorflow/core/kernels/mkl_conv_ops.cc | 280 +++++--- tensorflow/core/kernels/mkl_conv_ops.h | 222 +----- tensorflow/core/util/mkl_util.h | 32 +- 5 files changed, 1109 insertions(+), 557 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index e0706568b1..d12bccc02a 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -52,9 +52,310 @@ using mkldnn::stream; #endif namespace tensorflow { - typedef Eigen::ThreadPoolDevice CPUDevice; +#ifndef INTEL_MKL_ML + +struct MklConvBwdFilterParams { + memory::dims src_dims; + memory::dims diff_filter_dims; + memory::dims diff_bias_dims; + memory::dims diff_dst_dims; + memory::dims strides; + memory::dims dilations; + memory::dims padding_left; + memory::dims padding_right; + padding_kind padding; + + MklConvBwdFilterParams(memory::dims src_dims, + memory::dims diff_filter_dims, memory::dims diff_bias_dims, + memory::dims diff_dst_dims, memory::dims strides, + memory::dims dilations, memory::dims padding_left, + memory::dims padding_right, padding_kind padding) : + src_dims(src_dims), diff_filter_dims(diff_filter_dims), + diff_bias_dims(diff_bias_dims), diff_dst_dims(diff_dst_dims), + strides(strides), dilations(dilations), + padding_left(padding_left), padding_right(padding_right), + padding(padding) { + } +}; + +template +class MklConv2DBwdFilterPrimitive : public MklPrimitive { + public: + explicit MklConv2DBwdFilterPrimitive( + const MklConvBwdFilterParams& convBwdFilterDims) { + context_.bwd_filter_stream.reset(new stream(stream::kind::eager)); + // create conv primitive + if (context_.conv_bwd_filter == nullptr) { + Setup(convBwdFilterDims); + } + } + + ~MklConv2DBwdFilterPrimitive() {} + + // Convolution backward weights with bias + // src_data: input data buffer of src + // diff_filter_data: output data buffer of diff_filter + // diff_bias_data: output data buffer of diff_bias + // diff_dst_data: input data buffer of diff_dst + void Execute(const T* src_data, const T* diff_filter_data, + const T* diff_bias_data, const T* diff_dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.diff_filter_mem->set_data_handle( + static_cast(const_cast(diff_filter_data))); + context_.diff_bias_mem->set_data_handle( + static_cast(const_cast(diff_bias_data))); + context_.diff_dst_mem->set_data_handle( + static_cast(const_cast(diff_dst_data))); + + context_.bwd_filter_stream->submit(context_.bwd_filter_primitives); + + context_.src_mem->set_data_handle(DummyData); + context_.diff_filter_mem->set_data_handle(DummyData); + context_.diff_bias_mem->set_data_handle(DummyData); + context_.diff_dst_mem->set_data_handle(DummyData); + return; + } + + // Convolution backward weights without bias + // src_data: input data buffer of src + // diff_filter_data: output data buffer of diff_filter + // diff_dst_data: input data buffer of diff_dst + void Execute(const T* src_data, + const T* diff_filter_data, const T* diff_dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.diff_filter_mem->set_data_handle( + static_cast(const_cast(diff_filter_data))); + context_.diff_dst_mem->set_data_handle( + static_cast(const_cast(diff_dst_data))); + + context_.bwd_filter_stream->submit(context_.bwd_filter_primitives); + + context_.src_mem->set_data_handle(DummyData); + context_.diff_filter_mem->set_data_handle(DummyData); + context_.diff_dst_mem->set_data_handle(DummyData); + return; + } + + memory::format GetSrcMemoryFormat() const { + return context_.src_fmt; + } + + memory::format GetDiffDstMemoryFormat() const { + return context_.diff_dst_fmt; + } + + memory::format GetDiffFilterMemoryFormat() const { + return context_.diff_filter_fmt; + } + + // convolution primitive + std::shared_ptr + GetPrimitiveDesc() const { + return context_.bwd_filter_pd; + } + + private: + // Primitive reuse context for Conv2D bwd filter op + struct ConvBwdFilterContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format diff_dst_fmt; + memory::format diff_filter_fmt; + + // convolution bwd input primitive + std::shared_ptr + bwd_filter_pd; + std::shared_ptr conv_bwd_filter; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr diff_filter_mem; + std::shared_ptr diff_bias_mem; + std::shared_ptr diff_dst_mem; + + // desc & prmitive desc + std::shared_ptr bwd_filter_desc; + std::shared_ptr fwd_desc; + std::shared_ptr fwd_pd; + + // memory desc: forward & backward can share same memory desc + std::shared_ptr src_md; + std::shared_ptr diff_filter_md; + std::shared_ptr diff_bias_md; + std::shared_ptr diff_dst_md; + + // MKL pipeline + std::shared_ptr bwd_filter_stream; + std::vector bwd_filter_primitives; + + ConvBwdFilterContext() : + src_fmt(memory::format::any), + diff_dst_fmt(memory::format::any), + diff_filter_fmt(memory::format::any), + src_mem(nullptr), diff_filter_mem(nullptr), + diff_bias_mem(nullptr), diff_dst_mem(nullptr), + bwd_filter_desc(nullptr), fwd_desc(nullptr), fwd_pd(nullptr), + src_md(nullptr), diff_filter_md(nullptr), + diff_bias_md(nullptr), diff_dst_md(nullptr), + bwd_filter_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + // Setup Conv2d backward filter (weights) primitives. + void Setup(const MklConvBwdFilterParams& convBwdFilterDims) { + // create memory descriptors for convolution data w/ no specified format + context_.src_md.reset(new memory::desc({convBwdFilterDims.src_dims}, + MklDnnType(), memory::format::any)); + + context_.diff_dst_md.reset(new memory::desc( + {convBwdFilterDims.diff_dst_dims}, + MklDnnType(), memory::format::any)); + + context_.diff_filter_md.reset(new memory::desc( + {convBwdFilterDims.diff_filter_dims}, + MklDnnType(), memory::format::any)); + + if (!convBwdFilterDims.diff_bias_dims.empty()) + context_.diff_bias_md.reset(new memory::desc( + {convBwdFilterDims.diff_bias_dims}, + MklDnnType(), memory::format::x)); + + // create a convolution + if (!convBwdFilterDims.diff_bias_dims.empty()) { + context_.bwd_filter_desc.reset(new convolution_backward_weights::desc( + convolution_direct, *context_.src_md, *context_.diff_filter_md, + *context_.diff_bias_md, *context_.diff_dst_md, + convBwdFilterDims.strides, convBwdFilterDims.dilations, + convBwdFilterDims.padding_left, convBwdFilterDims.padding_right, + convBwdFilterDims.padding)); + } else { + context_.bwd_filter_desc.reset( + new convolution_backward_weights::desc( + convolution_direct, *context_.src_md, *context_.diff_filter_md, + *context_.diff_dst_md, convBwdFilterDims.strides, + convBwdFilterDims.dilations, convBwdFilterDims.padding_left, + convBwdFilterDims.padding_right, convBwdFilterDims.padding)); + } + + // create fwd primitive_desc + context_.fwd_desc.reset(new convolution_forward::desc( + prop_kind::forward, convolution_direct, + *context_.src_md, *context_.diff_filter_md, *context_.diff_dst_md, + convBwdFilterDims.strides, + convBwdFilterDims.dilations, convBwdFilterDims.padding_left, + convBwdFilterDims.padding_right, convBwdFilterDims.padding)); + context_.fwd_pd.reset(new convolution_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); + + // create backward conv primitive_desc + context_.bwd_filter_pd.reset( + new convolution_backward_weights::primitive_desc( + *context_.bwd_filter_desc, cpu_engine_, *context_.fwd_pd)); + + // store the expected memory format + auto bwd_filter_pd = context_.bwd_filter_pd.get(); + context_.src_fmt = static_cast( + bwd_filter_pd->src_primitive_desc().desc().data.format); + context_.diff_filter_fmt = static_cast( + bwd_filter_pd->diff_weights_primitive_desc().desc().data.format); + context_.diff_dst_fmt = static_cast( + bwd_filter_pd->diff_dst_primitive_desc().desc().data.format); + + // create memory primitive based on dummy data + context_.src_mem.reset(new memory( + bwd_filter_pd->src_primitive_desc(), DummyData)); + context_.diff_filter_mem.reset(new memory( + bwd_filter_pd->diff_weights_primitive_desc(), DummyData)); + context_.diff_dst_mem.reset(new memory( + bwd_filter_pd->diff_dst_primitive_desc(), DummyData)); + + // create convolution primitive and add it to net + if (!convBwdFilterDims.diff_bias_dims.empty()) { + context_.diff_bias_mem.reset(new memory( + {{{convBwdFilterDims.diff_bias_dims}, MklDnnType(), + memory::format::x}, cpu_engine_}, DummyData)); + context_.conv_bwd_filter.reset(new convolution_backward_weights( + *context_.bwd_filter_pd, *context_.src_mem, *context_.diff_dst_mem, + *context_.diff_filter_mem, *context_.diff_bias_mem)); + } else { + context_.conv_bwd_filter.reset(new convolution_backward_weights( + *context_.bwd_filter_pd, *context_.src_mem, + *context_.diff_dst_mem, *context_.diff_filter_mem)); + } + + context_.bwd_filter_primitives.push_back(*context_.conv_bwd_filter); + return; + } +}; + +template +class MklConv2DBwdFilterPrimitiveFactory : public MklPrimitiveFactory { + public: + static MklConv2DBwdFilterPrimitive* Get( + const MklConvBwdFilterParams& convBwdFilterDims) { + MklConv2DBwdFilterPrimitive* conv2d_bwd_filter = nullptr; + + // look into the pool for reusable primitive + conv2d_bwd_filter = dynamic_cast*> ( + MklConv2DBwdFilterPrimitiveFactory::GetInstance().GetConv2dBwdFilter( + convBwdFilterDims)); + + if (conv2d_bwd_filter == nullptr) { + conv2d_bwd_filter = new MklConv2DBwdFilterPrimitive( + convBwdFilterDims); + MklConv2DBwdFilterPrimitiveFactory::GetInstance().SetConv2dBwdFilter( + convBwdFilterDims, conv2d_bwd_filter); + } + return conv2d_bwd_filter; + } + + + private: + MklConv2DBwdFilterPrimitiveFactory() {} + ~MklConv2DBwdFilterPrimitiveFactory() {} + + static MklConv2DBwdFilterPrimitiveFactory& GetInstance() { + static MklConv2DBwdFilterPrimitiveFactory instance_; + return instance_; + } + + static std::string CreateKey( + const MklConvBwdFilterParams& convBwdFilterDims) { + std::string prefix = "conv2d_bwd_filter"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(convBwdFilterDims.src_dims); + key_creator.AddAsKey(convBwdFilterDims.diff_filter_dims); + key_creator.AddAsKey(convBwdFilterDims.diff_bias_dims); + key_creator.AddAsKey(convBwdFilterDims.diff_dst_dims); + key_creator.AddAsKey(convBwdFilterDims.strides); + key_creator.AddAsKey(convBwdFilterDims.dilations); + key_creator.AddAsKey(convBwdFilterDims.padding_left); + key_creator.AddAsKey(convBwdFilterDims.padding_right); + return key_creator.GetKey(); + } + + MklPrimitive* GetConv2dBwdFilter( + const MklConvBwdFilterParams& convBwdFilterDims) { + std::string key = CreateKey(convBwdFilterDims); + return this->GetOp(key); + } + + void SetConv2dBwdFilter( + const MklConvBwdFilterParams& convBwdFilterDims, MklPrimitive* op) { + std::string key = CreateKey(convBwdFilterDims); + this->SetOp(key, op); + } +}; + +#endif + #ifdef INTEL_MKL_ML template @@ -440,11 +741,213 @@ class MklConv2DCustomBackpropFilterOp : public MklConv2DBackpropCommonOp { public: explicit MklConv2DCustomBackpropFilterOp(OpKernelConstruction* context) - : MklConv2DBackpropCommonOp(context) {} + : MklConv2DBackpropCommonOp(context) { + } + ~MklConv2DCustomBackpropFilterOp() {} + void Compute(OpKernelContext* context) { + try { + MklDnnData src(&cpu_engine_); + MklDnnData diff_dst(&cpu_engine_); + MklDnnData diff_filter(&cpu_engine_); // output + + // Input tensors + const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2; + const Tensor& src_tensor = MklGetInput(context, kInputIdx); + const Tensor& filter_tensor = MklGetInput(context, kFilterIdx); + const Tensor& diff_dst_tensor = MklGetInput(context, kOutbpropIdx); + + MklDnnShape src_mkl_shape, filter_mkl_shape, diff_dst_mkl_shape; + GetMklShape(context, kInputIdx, &src_mkl_shape); + GetMklShape(context, kFilterIdx, &filter_mkl_shape); + GetMklShape(context, kOutbpropIdx, &diff_dst_mkl_shape); + // Allow operator-specific sanity checking of shapes. + ValidateMklShapes(src_mkl_shape, filter_mkl_shape, diff_dst_mkl_shape); + + // Allow operator-specific generation of shapes. + // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a + // tensor containing shape of filter. So filter.shape() is not + // a correct way to get filter shape. These operator-specific calls + // allow this class to handle this case. + TensorShape src_tf_shape = MakeInputTfShape(context, src_tensor); + TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor); + TensorShape diff_dst_tf_shape = GetTfShape(context, kOutbpropIdx); + + // Corner cases: output with 0 elements and 0 batch size. + Tensor* diff_filter_tensor = nullptr; + if (src_tf_shape.num_elements() == 0 || + filter_tf_shape.num_elements() == 0 || + diff_dst_tf_shape.num_elements() == 0) { + MklDnnShape diff_filter_mkl_shape; + diff_filter_mkl_shape.SetMklTensor(false); + TensorShape diff_filter_tf_shape = GetOutputTfShape( + src_tf_shape, filter_tf_shape, diff_dst_tf_shape); + const int kOutputIdx = 0; + AllocateOutputSetMklShape(context, kOutputIdx, &diff_filter_tensor, + diff_filter_tf_shape, diff_filter_mkl_shape); + CHECK_NOTNULL(diff_filter_tensor); + + // if output tensor has more than 0 elements, we need to 0 them out. + auto diff_filter_data = diff_filter_tensor->flat().data(); + for (size_t i = 0; i < diff_filter_tf_shape.num_elements(); ++i) { + diff_filter_data[i] = 0; + } + return; + } + + // By default, all dims are in MKL order. Only dims in TF order + // are those with prefix tf_order. + memory::dims diff_dst_dims, fwd_src_dims, fwd_filter_dims; + memory::dims padding_left, padding_right, dilations, + strides, fwd_dst_dims; + memory::dims fwd_dst_dims_tf_order; + + // Get forward convolution parameters. + MklDnnConvUtil conv_utl(context, this->strides_, this->padding_, + this->data_format_, this->dilations_); + conv_utl.GetConvFwdSizesInMklOrder( + src_tf_shape, filter_tf_shape, &fwd_src_dims, &fwd_filter_dims, + &strides, &dilations, &fwd_dst_dims_tf_order, + &fwd_dst_dims, &padding_left, &padding_right); + if (!context->status().ok()) return; + + auto tf_fmt = TFDataFormatToMklDnnDataFormat(this->data_format_); + auto fwd_src_md = + src_mkl_shape.IsMklTensor() + ? src_mkl_shape.GetMklLayout() + : memory::desc(fwd_src_dims, MklDnnType(), tf_fmt); + + conv_utl.GetInputSizeInMklOrder(diff_dst_tf_shape, &diff_dst_dims); + if (!context->status().ok()) return; + + auto diff_dst_md = diff_dst_mkl_shape.IsMklTensor() + ? diff_dst_mkl_shape.GetMklLayout() + : memory::desc(diff_dst_dims, + MklDnnType(), tf_fmt); + + memory::dims diff_bias_dims = {}; + int64 depth = 0; + if (biasEnabled) { + TensorShape obp_tf_shape = GetTfShape(context, 2); + depth = (this->data_format_ == FORMAT_NCHW) + ? obp_tf_shape.dim_size(1) + : obp_tf_shape.dim_size(3); + diff_bias_dims = {static_cast(depth)}; + } + + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + + MklConv2DBwdFilterPrimitive *conv2d_bwd_filter = nullptr; + MklConvBwdFilterParams convBwdFilterDims(fwd_src_dims, fwd_filter_dims, + diff_bias_dims, diff_dst_dims, strides, dilations, padding_left, + padding_right, TFPaddingToMklDnnPadding(this->padding_)); + conv2d_bwd_filter = MklConv2DBwdFilterPrimitiveFactory::Get( + convBwdFilterDims); + auto bwd_filter_pd = conv2d_bwd_filter->GetPrimitiveDesc(); + + // allocate output tensors: diff_fitler and diff_bias (w bias) + auto bwd_output_dims = GetOutputDims(fwd_src_dims, fwd_filter_dims); + + // diff_filter + MklDnnShape diff_filter_mkl_shape; + diff_filter_mkl_shape.SetMklTensor(false); + // output_dims_mkl_order is in OIHW format. + TensorShape diff_filter_tf_shape( + {bwd_output_dims[MklDnnDims::Dim_H], + bwd_output_dims[MklDnnDims::Dim_W], + bwd_output_dims[MklDnnDims::Dim_I], + bwd_output_dims[MklDnnDims::Dim_O]}); + AllocateOutputSetMklShape(context, 0, &diff_filter_tensor, + diff_filter_tf_shape, diff_filter_mkl_shape); + + Tensor* diff_bias_tensor = nullptr; + if (biasEnabled) { + TensorShape diff_bias_shape({depth}); + AllocateBiasGradTensor(context, diff_bias_shape, &diff_bias_tensor); + } + + // check if src and diff_dst need reorder + std::vector net; + T *src_data = nullptr; + if (fwd_src_md.data.format != conv2d_bwd_filter->GetSrcMemoryFormat()) { + src.SetUsrMem(fwd_src_md, &src_tensor); + src.CheckReorderToOpMem( + bwd_filter_pd->src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast(const_cast( + src_tensor.flat().data())); + } + + T *diff_dst_data = nullptr; + if (diff_dst_md.data.format != + conv2d_bwd_filter->GetDiffDstMemoryFormat()) { + diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + diff_dst.CheckReorderToOpMem( + bwd_filter_pd->diff_dst_primitive_desc(), &net); + diff_dst_data = static_cast( + diff_dst.GetOpMem().get_data_handle()); + } else { + diff_dst_data = static_cast(const_cast( + diff_dst_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); + + // For backward filter, convert diff_filter back to Tensorflow layout + // Here we prepare to reorder op memory back to user memory + bool diff_filter_reorder_required = false; + T *diff_filter_data = nullptr; + if (GetOutputFormat(tf_fmt) != + conv2d_bwd_filter->GetDiffFilterMemoryFormat()) { + // Allocate diff filter tensor as Tensorflow layout + diff_filter.SetUsrMem(bwd_output_dims, GetOutputFormat(tf_fmt), + diff_filter_tensor); + diff_filter_reorder_required = true; + diff_filter.PrepareReorderToUserMemIfReq( + bwd_filter_pd->diff_weights_primitive_desc()); + diff_filter_data = static_cast( + diff_filter.GetOpMem().get_data_handle()); + } else { + diff_filter_data = static_cast(const_cast( + diff_filter_tensor->flat().data())); + } + + // Execute convolution filter bwd + if (biasEnabled) { + T* diff_bias_data = static_cast(const_cast( + diff_bias_tensor->flat().data())); + conv2d_bwd_filter->Execute(src_data, diff_filter_data, + diff_bias_data, diff_dst_data); + } else { + conv2d_bwd_filter->Execute(src_data, diff_filter_data, diff_dst_data); + } + + // Reorder diff_filter back to Tensorflow layout if necessary + if (diff_filter_reorder_required) { + std::vector net; + diff_filter.InsertReorderToUserMem(&net); + stream(stream::kind::eager).submit(net).wait(); + } + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } + private: + const int kInputIndex_Filter = 1; + const int kInputIndex_InputSizes = 0; const int kDilationH = 0, kDilationW = 1; + engine cpu_engine_ = engine(engine::cpu, 0); + + // Validate input shapes. + // Function asserts that input shapes are valid. void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -452,141 +955,44 @@ class MklConv2DCustomBackpropFilterOp << "Conv2DBackpropFilter: filter should not be in MKL Layout"; } - size_t GetInputTensorIndexWithSizes() { return 1; /* filter index */ } - + // Get TensorFlow shape of input tensor. TensorShape MakeInputTfShape(OpKernelContext* context, const Tensor& input_tensor) { size_t input_idx = 0; return GetTfShape(context, input_idx); } + // Get TensorFlow shape of filter tensor. TensorShape MakeFilterTfShape(OpKernelContext* context, const Tensor& filter_tensor) { TensorShape filter_tf_shape; CHECK_EQ(TensorShapeUtils::IsVector(filter_tensor.shape()), true); CHECK_EQ(TensorShapeUtils::MakeShape(filter_tensor.vec(), - &filter_tf_shape) - .ok(), - true); + &filter_tf_shape).ok(), true); return filter_tf_shape; } + // Get Tensorflow shape of output tensor (diff_filter), + // which is same as shape of filter. TensorShape GetOutputTfShape(const TensorShape& input_shape, const TensorShape& filter_shape, const TensorShape& outbprop_shape) { - // Shape of output of Conv2DBackpropFilter is same as shape of filter. return filter_shape; } + // Get the shape of output (diff_filter) in MKL-DNN order. + // Computes shape of output from input shape (fwd_input_dims) + // and filter shape (fwd_filter_dims). const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims, const memory::dims& fwd_filter_dims) { - // Shape of output of Conv2DBackpropFilter is same as shape of filter. return fwd_filter_dims; } + // Output layout is Tensorflow's filter layout (HWIO). memory::format GetOutputFormat(const memory::format data_format) { - // Output layout is Tensorflow's filter layout (HWIO). return memory::format::hwio; } - void CreatePrimitive(OpKernelContext* context, const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, - MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, - const memory::dims& strides, - const memory::dims& dilations, - const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) { - CHECK_NOTNULL(context); - CHECK_NOTNULL(input); - CHECK_NOTNULL(filter); - CHECK_NOTNULL(outbackprop); - CHECK_NOTNULL(output); - CHECK_NOTNULL(output_tensor); - - MklDnnData* bias_grad = nullptr; - int depth = 0; - if (biasEnabled) { - // Data structure for bias_grad - bias_grad = new MklDnnData(&cpu_engine); - TensorShape obp_tf_shape = GetTfShape(context, 2); - depth = (MklConv2DBackpropCommonOp::GetTFDataFormat() == - FORMAT_NCHW) - ? obp_tf_shape.dim_size(1) - : obp_tf_shape.dim_size(3); - memory::dims bias_grad_dims = {depth}; - bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x); - } - - if (biasEnabled && (bias_grad != nullptr)) { - // Create convolution backward weights with bias primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding) : - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); - auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, - cpu_engine, - conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, - bwd_output_format, output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); - - // Allocate bias_grad tensor - TensorShape bias_grad_shape({depth}); - Tensor* bias_grad_tensor = nullptr; - AllocateBiasGradTensor(context, bias_grad_shape, &bias_grad_tensor); - memory::dims bias_grad_dims = {depth}; - // Since Bias is 1D, we use format::x from MKLDNN to represent it. - auto bias_grad_md = - memory::desc({bias_grad_dims}, MklDnnType(), memory::format::x); - bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor); - bias_grad->SetUsrMemDataHandle(bias_grad_tensor); - - PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, - bias_grad); - } else { - // Create convolution backward weights primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding) : - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); - auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, - cpu_engine, - conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, - bwd_output_format, output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); - PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output); - } - } - // Allocate output tensor. void AllocateOutputTensor( OpKernelContext* context, @@ -621,40 +1027,8 @@ class MklConv2DCustomBackpropFilterOp MklDnnShape bias_grad_mkl_shape; bias_grad_mkl_shape.SetMklTensor(false); - AllocateOutputSetMklShape(context, 1, bias_grad_tensor, bias_grad_shape, - bias_grad_mkl_shape); - } - - // Prepare and execute net - checks for input and output reorders. - void PrepareAndExecutePrimitive( - const convolution_backward_weights::primitive_desc& conv_pd, - MklDnnData* input, MklDnnData* obp, MklDnnData* output, - MklDnnData* bias_grad = nullptr) { - // Create reorders between user layout and MKL layout if it is needed and - // add it to the net before convolution. - std::vector net; - input->CheckReorderToOpMem(conv_pd.src_primitive_desc(), &net); - obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net); - - // For BackpropFilter, we convert the output tensor back in Tensorflow - // layout. - bool output_reorder_required = output->PrepareReorderToUserMemIfReq( - conv_pd.diff_weights_primitive_desc()); - - if (biasEnabled && (bias_grad != nullptr)) { - net.push_back(convolution_backward_weights( - conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem(), - bias_grad->GetOpMem())); - } else { - net.push_back(convolution_backward_weights( - conv_pd, input->GetOpMem(), obp->GetOpMem(), output->GetOpMem())); - } - - if (output_reorder_required) { - output->InsertReorderToUserMem(&net); - } - - stream(stream::kind::eager).submit(net).wait(); + AllocateOutputSetMklShape(context, 1, bias_grad_tensor, + bias_grad_shape, bias_grad_mkl_shape); } }; diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index d203c04934..e4b8564589 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -53,9 +53,244 @@ using mkldnn::stream; #endif namespace tensorflow { - typedef Eigen::ThreadPoolDevice CPUDevice; +#ifndef INTEL_MKL_ML + +/// utility classes enabling primitive reuse for backward conv2d ops. +struct MklConvBwdInputParams { + memory::dims diff_src_dims; + memory::dims filter_dims; + memory::dims diff_dst_dims; + memory::dims strides; + memory::dims dilations; + memory::dims padding_left; + memory::dims padding_right; + padding_kind padding; + + MklConvBwdInputParams(memory::dims diff_src_dims, + memory::dims filter_dims, memory::dims diff_dst_dims, + memory::dims strides, memory::dims dilations, + memory::dims padding_left, memory::dims padding_right, + padding_kind padding) : + diff_src_dims(diff_src_dims), filter_dims(filter_dims), + diff_dst_dims(diff_dst_dims), strides(strides), + dilations(dilations), padding_left(padding_left), + padding_right(padding_right), padding(padding) { + } +}; + +template +class MklConv2DBwdInputPrimitive : public MklPrimitive { + public: + explicit MklConv2DBwdInputPrimitive( + const MklConvBwdInputParams& convBwdInputDims) { + context_.bwd_input_stream.reset(new stream(stream::kind::eager)); + + // create conv primitive + if (context_.conv_bwd_input == nullptr) { + Setup(convBwdInputDims); + } + } + ~MklConv2DBwdInputPrimitive() {} + + // Convolution backward filter (weights) + // diff_src_data: output data buffer of diff_src + // filter_data: input data buffer of filter (weights) + // diff_dst_data: input data buffer of dst + // Bias does not matter here + void Execute(const T* diff_src_data, + const T* filter_data, const T* diff_dst_data) { + context_.diff_src_mem->set_data_handle( + static_cast(const_cast(diff_src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.diff_dst_mem->set_data_handle( + static_cast(const_cast(diff_dst_data))); + + context_.bwd_input_stream->submit(context_.bwd_input_primitives); + + // set back data handle + context_.diff_src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.diff_dst_mem->set_data_handle(DummyData); + return; + } + + memory::format GetFilterMemoryFormat() const { + return context_.filter_fmt; + } + + memory::format GetDiffDstMemoryFormat() const { + return context_.diff_dst_fmt; + } + + std::shared_ptr + GetPrimitiveDesc() const { + return context_.bwd_input_pd; + } + + private: + // Primitive reuse context for Conv2D Bwd Input op + struct ConvBwdInputContext { + // expected memory format for this primitive instance + memory::format filter_fmt; + memory::format diff_dst_fmt; + + // MKLDNN memory + std::shared_ptr diff_src_mem; + std::shared_ptr filter_mem; + std::shared_ptr diff_dst_mem; + + // convolution primitive + std::shared_ptr + bwd_input_pd; + std::shared_ptr conv_bwd_input; + + // desc & prmitive desc + std::shared_ptr bwd_input_desc; + std::shared_ptr fwd_desc; + std::shared_ptr fwd_pd; + + // memory desc: forward & backward can share same memory::desc + std::shared_ptr diff_src_md; + std::shared_ptr filter_md; + std::shared_ptr diff_dst_md; + + // MKL pipeline + std::shared_ptr bwd_input_stream; + std::vector bwd_input_primitives; + + ConvBwdInputContext() : + filter_fmt(memory::format::any), diff_dst_fmt(memory::format::any), + diff_src_mem(nullptr), filter_mem(nullptr), diff_dst_mem(nullptr), + bwd_input_pd(nullptr), conv_bwd_input(nullptr), + bwd_input_desc(nullptr), fwd_desc(nullptr), fwd_pd(nullptr), + diff_src_md(nullptr), filter_md(nullptr), diff_dst_md(nullptr), + bwd_input_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + void Setup(const MklConvBwdInputParams& convBwdInputDims) { + // create memory descriptors for convolution data w/ no specified format + context_.diff_src_md.reset(new memory::desc( + {convBwdInputDims.diff_src_dims}, + MklDnnType(), memory::format::any)); + context_.filter_md.reset(new memory::desc( + {convBwdInputDims.filter_dims}, + MklDnnType(), memory::format::any)); + context_.diff_dst_md.reset(new memory::desc( + {convBwdInputDims.diff_dst_dims}, + MklDnnType(), memory::format::any)); + + // create convolution primitives + context_.bwd_input_desc.reset(new convolution_backward_data::desc( + convolution_direct, *context_.diff_src_md, *context_.filter_md, + *context_.diff_dst_md, convBwdInputDims.strides, + convBwdInputDims.dilations, convBwdInputDims.padding_left, + convBwdInputDims.padding_right, convBwdInputDims.padding)); + + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.diff_src_md, *context_.filter_md, + *context_.diff_dst_md, convBwdInputDims.strides, + convBwdInputDims.dilations, convBwdInputDims.padding_left, + convBwdInputDims.padding_right, convBwdInputDims.padding)); + + context_.fwd_pd.reset(new convolution_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); + + // create backward conv prim desc + context_.bwd_input_pd.reset( + new convolution_backward_data::primitive_desc( + *context_.bwd_input_desc, cpu_engine_, *context_.fwd_pd)); + + // create memory primitive based on dummy data + context_.diff_src_mem.reset(new memory( + context_.bwd_input_pd.get()->diff_src_primitive_desc(), DummyData)); + context_.filter_mem.reset(new memory( + context_.bwd_input_pd.get()->weights_primitive_desc(), DummyData)); + context_.diff_dst_mem.reset(new memory( + context_.bwd_input_pd.get()->diff_dst_primitive_desc(), DummyData)); + + // store the expected memory format + context_.filter_fmt = static_cast( + context_.bwd_input_pd.get()->weights_primitive_desc().desc().data.format); + context_.diff_dst_fmt = static_cast( + context_.bwd_input_pd.get()->diff_dst_primitive_desc().desc().data.format); + + // create convolution primitive and add it to net + context_.conv_bwd_input.reset(new convolution_backward_data( + *context_.bwd_input_pd, *context_.diff_dst_mem, + *context_.filter_mem, *context_.diff_src_mem)); + + context_.bwd_input_primitives.push_back(*context_.conv_bwd_input); + return; + } +}; + +template +class MklConv2DBwdInputPrimitiveFactory : public MklPrimitiveFactory { + private: + MklConv2DBwdInputPrimitiveFactory() {} + ~MklConv2DBwdInputPrimitiveFactory() {} + + public: + static MklConv2DBwdInputPrimitive* Get( + const MklConvBwdInputParams& convBwdInputDims) { + MklConv2DBwdInputPrimitive* conv2d_bwd_input = nullptr; + + // look into the pool for reusable primitive + conv2d_bwd_input = dynamic_cast*> ( + MklConv2DBwdInputPrimitiveFactory::GetInstance().GetConv2dBwdInput( + convBwdInputDims)); + + if (conv2d_bwd_input == nullptr) { + conv2d_bwd_input = new MklConv2DBwdInputPrimitive( + convBwdInputDims); + MklConv2DBwdInputPrimitiveFactory::GetInstance().SetConv2dBwdInput( + convBwdInputDims, conv2d_bwd_input); + } + return conv2d_bwd_input; + } + + private: + static MklConv2DBwdInputPrimitiveFactory& GetInstance() { + static MklConv2DBwdInputPrimitiveFactory instance_; + return instance_; + } + + static std::string CreateKey( + const MklConvBwdInputParams& convBwdInputDims) { + std::string prefix = "conv2d_bwd_input"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(convBwdInputDims.diff_src_dims); + key_creator.AddAsKey(convBwdInputDims.filter_dims); + key_creator.AddAsKey(convBwdInputDims.diff_dst_dims); + key_creator.AddAsKey(convBwdInputDims.strides); + key_creator.AddAsKey(convBwdInputDims.dilations); + key_creator.AddAsKey(convBwdInputDims.padding_left); + key_creator.AddAsKey(convBwdInputDims.padding_right); + return key_creator.GetKey(); + } + + MklPrimitive* GetConv2dBwdInput( + const MklConvBwdInputParams& convBwdInputDims) { + std::string key = CreateKey(convBwdInputDims); + return this->GetOp(key); + } + + void SetConv2dBwdInput( + const MklConvBwdInputParams& convBwdInputDims, MklPrimitive *op) { + std::string key = CreateKey(convBwdInputDims); + this->SetOp(key, op); + } +}; + +#endif + #ifdef INTEL_MKL_ML template @@ -363,13 +598,173 @@ class MklConv2DCustomBackpropInputOp : public MklConv2DBackpropCommonOp { public: explicit MklConv2DCustomBackpropInputOp(OpKernelConstruction* context) - : MklConv2DBackpropCommonOp(context) {} + : MklConv2DBackpropCommonOp(context) { + } + ~MklConv2DCustomBackpropInputOp() {} + void Compute(OpKernelContext* context) { + try { + MklDnnData filter(&cpu_engine); + MklDnnData diff_dst(&cpu_engine); + + // Input tensors + const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2; + const Tensor& src_tensor = MklGetInput(context, kInputIdx); + const Tensor& filter_tensor = MklGetInput(context, kFilterIdx); + const Tensor& diff_dst_tensor = MklGetInput(context, kOutbpropIdx); + + MklDnnShape src_mkl_shape, filter_mkl_shape, diff_dst_mkl_shape; + GetMklShape(context, kInputIdx, &src_mkl_shape); + GetMklShape(context, kFilterIdx, &filter_mkl_shape); + GetMklShape(context, kOutbpropIdx, &diff_dst_mkl_shape); + // Allow operator-specific sanity checking of shapes. + ValidateMklShapes(src_mkl_shape, filter_mkl_shape, + diff_dst_mkl_shape); + + // Allow operator-specific generation of shapes. + // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a + // tensor containing shape of filter. So filter.shape() is not + // a correct way to get filter shape. These operator-specific calls + // allow this class to handle this case. + TensorShape src_tf_shape = MakeInputTfShape(context, src_tensor); + TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor); + TensorShape diff_dst_tf_shape = GetTfShape(context, kOutbpropIdx); + + // Corner cases: output with 0 elements and 0 batch size. + Tensor* diff_src_tensor = nullptr; + if (src_tf_shape.num_elements() == 0 || + filter_tf_shape.num_elements() == 0 || + diff_dst_tf_shape.num_elements() == 0) { + MklDnnShape diff_src_mkl_shape; + diff_src_mkl_shape.SetMklTensor(false); + TensorShape diff_src_tf_shape = GetOutputTfShape( + src_tf_shape, filter_tf_shape, diff_dst_tf_shape); + const int kOutputIdx = 0; + AllocateOutputSetMklShape(context, kOutputIdx, &diff_src_tensor, + diff_src_tf_shape, diff_src_mkl_shape); + CHECK_NOTNULL(diff_src_tensor); + + // if output tensor has more than 0 elements, we need to 0 them out. + auto diff_src_data = diff_src_tensor->flat().data(); + for (size_t i = 0; i < diff_src_tf_shape.num_elements(); ++i) { + diff_src_data[i] = 0; + } + return; + } + // By default, all dims are in MKL order. Only dims in TF order + // are those with postfix tf_order. + memory::dims diff_dst_dims, fwd_src_dims, fwd_filter_dims; + memory::dims padding_left, padding_right, dilations, strides; + memory::dims fwd_output_dims, fwd_output_dims_tf_order; + + // Get forward convolution parameters. + MklDnnConvUtil conv_utl(context, this->strides_, this->padding_, + this->data_format_, this->dilations_); + conv_utl.GetConvFwdSizesInMklOrder( + src_tf_shape, filter_tf_shape, &fwd_src_dims, &fwd_filter_dims, + &strides, &dilations, &fwd_output_dims_tf_order, &fwd_output_dims, + &padding_left, &padding_right); + if (!context->status().ok()) return; + + // Create Convolution forward descriptor since Convolution backward + // API needs it. For that, we first need to create input, filter + // and output memory descriptors. + auto tf_fmt = TFDataFormatToMklDnnDataFormat(this->data_format_); + + // If filter is in MKL layout, then simply grab filter layout; + // otherwise, construct filter in TF layout. + // For TF layout, filter is in HWIO format. + auto fwd_filter_md = filter_mkl_shape.IsMklTensor() + ? filter_mkl_shape.GetMklLayout() + : memory::desc(fwd_filter_dims, MklDnnType(), + memory::format::hwio); + + conv_utl.GetInputSizeInMklOrder(diff_dst_tf_shape, &diff_dst_dims); + if (!context->status().ok()) return; + auto diff_dst_md = diff_dst_mkl_shape.IsMklTensor() + ? diff_dst_mkl_shape.GetMklLayout() + : memory::desc(diff_dst_dims, + MklDnnType(), tf_fmt); + + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + + MklConv2DBwdInputPrimitive *conv2d_bwd_input = nullptr; + conv_utl.GetInputSizeInMklOrder(diff_dst_tf_shape, &diff_dst_dims); + MklConvBwdInputParams convBwdInputDims(fwd_src_dims, fwd_filter_dims, + diff_dst_dims, strides, dilations, padding_left, padding_right, + TFPaddingToMklDnnPadding(this->padding_)); + conv2d_bwd_input = MklConv2DBwdInputPrimitiveFactory::Get( + convBwdInputDims); + auto bwd_input_pd = conv2d_bwd_input->GetPrimitiveDesc(); + + // allocate output tensor + auto diff_src_pd = bwd_input_pd->diff_src_primitive_desc(); + auto bwd_diff_src_dims = GetOutputDims(fwd_src_dims, fwd_filter_dims); + auto bwd_diff_src_format = GetOutputFormat(tf_fmt); + MklDnnShape diff_src_mkl_shape; + diff_src_mkl_shape.SetMklTensor(true); + diff_src_mkl_shape.SetMklLayout(&diff_src_pd); + diff_src_mkl_shape.SetElemType(MklDnnType()); + diff_src_mkl_shape.SetTfLayout(bwd_diff_src_dims.size(), + bwd_diff_src_dims, bwd_diff_src_format); + TensorShape diff_src_tf_shape; + diff_src_tf_shape.AddDim(diff_src_pd.get_size() / sizeof(T)); + AllocateOutputSetMklShape(context, 0, &diff_src_tensor, + diff_src_tf_shape, diff_src_mkl_shape); + + T *diff_src_data = static_cast(const_cast( + diff_src_tensor->flat().data())); + + // check if filter and diff_dst need reorder + std::vector net; + T* filter_data = nullptr; + if (fwd_filter_md.data.format != + conv2d_bwd_input->GetFilterMemoryFormat()) { + filter.SetUsrMem(fwd_filter_md, &filter_tensor); + filter.CheckReorderToOpMem( + bwd_input_pd->weights_primitive_desc(), + &net); + filter_data = static_cast(filter.GetOpMem().get_data_handle()); + } else { + filter_data = static_cast(const_cast( + filter_tensor.flat().data())); + } + + T* diff_dst_data = nullptr; + if (diff_dst_md.data.format != + conv2d_bwd_input->GetDiffDstMemoryFormat()) { + diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + diff_dst.CheckReorderToOpMem( + bwd_input_pd->diff_dst_primitive_desc(), &net); + diff_dst_data = static_cast( + diff_dst.GetOpMem().get_data_handle()); + } else { + diff_dst_data = static_cast(const_cast( + diff_dst_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); + + // execute convolution input bwd + conv2d_bwd_input->Execute(diff_src_data, filter_data, diff_dst_data); + } catch (mkldnn::error& e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + ", in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); + } + } + private: - const int kInputIndex_Filter = 1, kInputIndex_InputSizes = 0, - kInputIndex_OutBackProp = 2; + const int kInputIndex_Filter = 1, kInputIndex_InputSizes = 0; const int kDilationH = 0, kDilationW = 1; + engine cpu_engine = engine(engine::cpu, 0); + + // Validate input shapes. + // Function asserts that input shapes are valid. void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -380,8 +775,7 @@ class MklConv2DCustomBackpropInputOp << "Conv2DBackpropInput: input should not be in MKL Layout"; } - size_t GetInputTensorIndexWithSizes() { return kInputIndex_InputSizes; } - + // Get TensorFlow shape of input tensor. TensorShape MakeInputTfShape(OpKernelContext* context, const Tensor& input_tensor) { TensorShape input_tf_shape; @@ -393,72 +787,32 @@ class MklConv2DCustomBackpropInputOp return input_tf_shape; } + // Get TensorFlow shape of filter tensor. TensorShape MakeFilterTfShape(OpKernelContext* context, const Tensor& filter_tensor) { return GetTfShape(context, kInputIndex_Filter); } + // Get the Tensorflow shape of Output (diff_src), + // which is same as shape of Conv2D 'input'. TensorShape GetOutputTfShape(const TensorShape& input_shape, const TensorShape& filter_shape, const TensorShape& outbprop_shape) { - // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'. return input_shape; } + // Get the Tensorflow shape of Output (diff_src), + // which is same as shape of Conv2D 'input'. const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims, const memory::dims& fwd_filter_dims) { - // Output Shape of Conv2DBackpropInput is same as shape of Conv2D 'input'. return fwd_input_dims; } + // Output layout is Tensorflow's layout in data format order. memory::format GetOutputFormat(const memory::format data_format) { - // Output layout is Tensorflow's layout in data format order. return data_format; } - void CreatePrimitive(OpKernelContext* context, const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, - MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, - const memory::dims& strides, - const memory::dims& dilations, - const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) { - CHECK_NOTNULL(context); - CHECK_NOTNULL(input); - CHECK_NOTNULL(filter); - CHECK_NOTNULL(outbackprop); - CHECK_NOTNULL(output); - CHECK_NOTNULL(output_tensor); - - // Create convolution backward data primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_data::desc(convolution_direct, - output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding): - convolution_backward_data::desc(convolution_direct, - output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); - - auto bwd_pd = convolution_backward_data::primitive_desc( - bwd_desc, cpu_engine, conv_fwd_pd); - - // Allocate output tensor in TensorFlow and MKL layout. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, bwd_output_format, - output_tensor); - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); - - PrepareAndExecutePrimitive(bwd_pd, filter, outbackprop, output); - } - // Allocate output tensor. void AllocateOutputTensor( OpKernelContext* context, @@ -485,22 +839,6 @@ class MklConv2DCustomBackpropInputOp AllocateOutputSetMklShape(context, 0, output_tensor, output_tf_shape, output_mkl_shape); } - - // Prepare and execute net - checks for input and output reorders. - void PrepareAndExecutePrimitive( - const convolution_backward_data::primitive_desc& conv_pd, - MklDnnData* filter, MklDnnData* obp, MklDnnData* output) { - // Create reorders between user layout and MKL layout if it is needed and - // add it to the net before convolution. - std::vector net; - filter->CheckReorderToOpMem(conv_pd.weights_primitive_desc(), &net); - obp->CheckReorderToOpMem(conv_pd.diff_dst_primitive_desc(), &net); - - net.push_back(convolution_backward_data( - conv_pd, obp->GetOpMem(), filter->GetOpMem(), output->GetOpMem())); - - stream(stream::kind::eager).submit(net).wait(); - } }; #endif // INTEL_MKL_ML diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index f2b14f1278..c032add82e 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,7 +59,8 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -struct ConvFwdDimensions { +// This structure aggregates multiple inputs to Conv2DFwd* methods. +struct MklConvFwdParams { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -69,7 +70,7 @@ struct ConvFwdDimensions { memory::dims padding_left; memory::dims padding_right; - ConvFwdDimensions(memory::dims src_dims, + MklConvFwdParams(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -82,35 +83,40 @@ struct ConvFwdDimensions { }; template -class Conv2DFwd : public DnnOp { +class MklConv2DFwdPrimitive: public MklPrimitive { public: - explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { - fwd_stream_.reset(new stream(stream::kind::eager)); + explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { + context_.fwd_stream.reset(new stream(stream::kind::eager)); // create conv primitive - if (conv_fwd_ == nullptr) { + if (context_.conv_fwd == nullptr) { Setup(convFwdDims); } } - ~Conv2DFwd() {} + ~MklConv2DFwdPrimitive() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - bias_mem_->set_data_handle(static_cast(bias_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); + void Execute(const T* src_data, const T* filter_data, + const T* bias_data, const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.bias_mem->set_data_handle( + static_cast(const_cast(bias_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - bias_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.bias_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } @@ -119,139 +125,174 @@ class Conv2DFwd : public DnnOp { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); - - // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + void Execute(const T* src_data, const T* filter_data, + const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } - // expected memory format for this primitive instance - memory::format src_fmt_; - memory::format filter_fmt_; + memory::format GetSrcMemoryFormat() const { + return context_.src_fmt; + } + + memory::format GetFilterMemoryFormat() const { + return context_.filter_fmt; + } - // convolution primitive - std::shared_ptr fwd_pd_; - std::shared_ptr conv_fwd_; + std::shared_ptr + GetPrimitiveDesc() const { + return context_.fwd_pd; + } private: - void Setup(const ConvFwdDimensions& convFwdDims) { + // Primitive reuse context for Conv2D Fwd op + struct ConvFwdContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format filter_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr filter_mem; + std::shared_ptr bias_mem; + std::shared_ptr dst_mem; + + // desc & prmitive desc + std::shared_ptr fwd_desc; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr filter_md; + std::shared_ptr bias_md; + std::shared_ptr dst_md; + + // convolution primitive + std::shared_ptr fwd_pd; + std::shared_ptr conv_fwd; + + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + ConvFwdContext() : + src_fmt(memory::format::any), filter_fmt(memory::format::any), + src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), + dst_mem(nullptr), fwd_desc(nullptr), + src_md(nullptr), filter_md(nullptr), bias_md(nullptr), + fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + void Setup(const MklConvFwdParams& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - src_md_.reset(new memory::desc({convFwdDims.src_dims}, + context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, + context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, + context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, + context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.bias_md, *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *dst_md_, - convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, - convFwdDims.padding_right, padding_kind::zero)); + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, + convFwdDims.padding_left, convFwdDims.padding_right, + padding_kind::zero)); } - fwd_pd_.reset(new convolution_forward::primitive_desc( - *fwd_desc_, cpu_engine_)); + context_.fwd_pd.reset(new convolution_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); // store the expected memory format - src_fmt_ = static_cast( - fwd_pd_.get()->src_primitive_desc().desc().data.format); + context_.src_fmt = static_cast( + context_.fwd_pd.get()->src_primitive_desc().desc().data.format); - filter_fmt_ = static_cast( - fwd_pd_.get()->weights_primitive_desc().desc().data.format); + context_.filter_fmt = static_cast( + context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); - filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), - DummyData)); - dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); + context_.src_mem.reset(new memory( + context_.fwd_pd.get()->src_primitive_desc(), DummyData)); + context_.filter_mem.reset(new memory( + context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); + context_.dst_mem.reset(new memory( + context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), - memory::format::x}, cpu_engine_}, DummyData)); - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *bias_mem_, *dst_mem_)); + context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, + MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, + *context_.bias_mem, *context_.dst_mem)); } else { - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *dst_mem_)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, + *context_.filter_mem, *context_.dst_mem)); } - fwd_primitives_.push_back(*conv_fwd_); + context_.fwd_primitives.push_back(*context_.conv_fwd); return; } - - // MKLDNN memory - std::shared_ptr src_mem_; - std::shared_ptr filter_mem_; - std::shared_ptr bias_mem_; - std::shared_ptr dst_mem_; - - std::shared_ptr fwd_stream_; - std::vector fwd_primitives_; - - // desc & prmitive desc - std::shared_ptr fwd_desc_; - - // memory desc - std::shared_ptr src_md_; - std::shared_ptr filter_md_; - std::shared_ptr bias_md_; - std::shared_ptr dst_md_; - - engine cpu_engine_ = engine(engine::cpu, 0); }; template -class Conv2DFwdFactory : public DnnOpFactory { +class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { public: - static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { - Conv2DFwd* conv2d_fwd = nullptr; + static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { + MklConv2DFwdPrimitive* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( + convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new Conv2DFwd(convFwdDims); - Conv2DFwdFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); + MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - Conv2DFwdFactory() {} - ~Conv2DFwdFactory() {} + MklConv2DFwdPrimitiveFactory() {} + ~MklConv2DFwdPrimitiveFactory() {} static const int kDilationH = 0, kDilationW = 1; - static Conv2DFwdFactory& GetInstance() { - static Conv2DFwdFactory instance_; + static MklConv2DFwdPrimitiveFactory& GetInstance() { + static MklConv2DFwdPrimitiveFactory instance_; return instance_; } - static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { + static std::string CreateKey(const MklConvFwdParams& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -266,12 +307,12 @@ class Conv2DFwdFactory : public DnnOpFactory { return key_creator.GetKey(); } - DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { + MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { + void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -762,7 +803,6 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); - MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -812,7 +852,6 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); - src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -820,29 +859,28 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); - filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - Conv2DFwd *conv2d_fwd = nullptr; + MklConv2DFwdPrimitive *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, + MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } else { - ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, + MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->fwd_pd_; + conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -854,20 +892,30 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - if (src_md.data.format != conv2d_fwd->src_fmt_) - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - - if (filter_md.data.format != conv2d_fwd->filter_fmt_) - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); + T *src_data = nullptr; + if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast(const_cast( + src_tensor.flat().data())); + } + T *filter_data = nullptr; + if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { + filter.SetUsrMem(filter_md, &filter_tensor); + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); + filter_data = static_cast(filter.GetOpMem().get_data_handle()); + } else { + filter_data = static_cast(const_cast( + filter_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); - T* src_data = static_cast( - src.GetOpMem().get_data_handle()); - T* filter_data = static_cast( - filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 8333a09316..5e1a5001dc 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -19,6 +19,7 @@ limitations under the License. #include #include #include +#include #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" @@ -349,6 +350,7 @@ class MklDnnConvUtil { } }; + ///////////////////////////////////////////////////////////////////// /// Common class that implements Conv2DBackpropFilter and Input ///////////////////////////////////////////////////////////////////// @@ -388,227 +390,17 @@ class MklConv2DBackpropCommonOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } - void Compute(OpKernelContext* context) override { - try { - auto cpu_engine = engine(engine::cpu, 0); - - // Prepare common tensors for Conv2DBackpropInput and - // Conv2DBackpropFilter. - MklDnnData input(&cpu_engine); - MklDnnData filter(&cpu_engine); - MklDnnData outbackprop(&cpu_engine); - MklDnnData output(&cpu_engine); - - // Input tensors - const int kInputIdx = 0, kFilterIdx = 1, kOutbpropIdx = 2; - const Tensor& input_tensor = MklGetInput(context, kInputIdx); - const Tensor& filter_tensor = MklGetInput(context, kFilterIdx); - const Tensor& outbprop_tensor = MklGetInput(context, kOutbpropIdx); - - MklDnnShape input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape; - GetMklShape(context, kInputIdx, &input_mkl_shape); - GetMklShape(context, kFilterIdx, &filter_mkl_shape); - GetMklShape(context, kOutbpropIdx, &outbprop_mkl_shape); - // Allow operator-specific sanity checking of shapes. - ValidateMklShapes(input_mkl_shape, filter_mkl_shape, outbprop_mkl_shape); - - // Allow operator-specific generation of shapes. - // E.g., Conv2DBackpropFilter gets filter as filter_sizes. It is a - // tensor containing shape of filter. So filter.shape() is not - // a correct way to get filter shape. These operator-specific calls - // allow this class to handle this case. - TensorShape input_tf_shape = MakeInputTfShape(context, input_tensor); - TensorShape filter_tf_shape = MakeFilterTfShape(context, filter_tensor); - TensorShape outbprop_tf_shape = GetTfShape(context, kOutbpropIdx); - - // Corner cases: output with 0 elements and 0 batch size. - Tensor* output_tensor = nullptr; - if (input_tf_shape.num_elements() == 0 || - filter_tf_shape.num_elements() == 0 || - outbprop_tf_shape.num_elements() == 0) { - MklDnnShape output_mkl_shape; - output_mkl_shape.SetMklTensor(false); - TensorShape output_tf_shape = GetOutputTfShape( - input_tf_shape, filter_tf_shape, outbprop_tf_shape); - const int kOutputIdx = 0; - AllocateOutputSetMklShape(context, kOutputIdx, &output_tensor, - output_tf_shape, output_mkl_shape); - CHECK_NOTNULL(output_tensor); - - // if output tensor has more than 0 elements, we need to 0 them out. - for (size_t i = 0; i < output_tf_shape.num_elements(); ++i) { - output_tensor->flat().data()[i] = 0; - } - - return; - } - - // By default, all dims are in MKL order. Only dims in TF order - // are those with prefix tf_order. - memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims; - memory::dims padding_l, padding_r, dilations, strides, fwd_output_dims; - memory::dims fwd_output_dims_tf_order; - - // Get forward convolution parameters. - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, - dilations_); - conv_utl.GetConvFwdSizesInMklOrder( - input_tf_shape, filter_tf_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &dilations, &fwd_output_dims_tf_order, &fwd_output_dims, - &padding_l, &padding_r); - if (!context->status().ok()) return; - - // Create Convolution forward descriptor since Convolution backward - // API needs it. For that, we first need to create input, filter - // and output memory descriptors. - auto tf_fmt = TFDataFormatToMklDnnDataFormat(data_format_); - // If input is in MKL layout, then simply grab input layout; otherwise, - // construct input TF layout. For TF layout, although input shape - // required is in MKL-DNN order, the layout is Tensorflow's layout - // (NHWC or NCHW depending on data format). - auto fwd_input_md = - input_mkl_shape.IsMklTensor() - ? input_mkl_shape.GetMklLayout() - : memory::desc(fwd_input_dims, MklDnnType(), tf_fmt); - // If filter is in MKL layout, then simply grab filter layout; otherwise - // construct filter in TF layout. For TF layout, filter is in HWIO format. - auto fwd_filter_md = filter_mkl_shape.IsMklTensor() - ? filter_mkl_shape.GetMklLayout() - : memory::desc(fwd_filter_dims, MklDnnType(), - memory::format::hwio); - // Tensorflow Output of Conv2D is in data_format order. - auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), tf_fmt); - - const int kDilationH = 0, kDilationW = 1; - dilations[kDilationH] -= 1; - dilations[kDilationW] -= 1; - auto fwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0)? - convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_input_md, - fwd_filter_md, fwd_out_md, - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)) : - convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_input_md, - fwd_filter_md, fwd_out_md, - strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); - auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); - - // Create memory for user data. Describe how the inputs and outputs of - // Convolution look like. Also specify buffers containing actual input - // and output data. - - // Since this is a common class for both Conv2DBackpropFilter and - // Conv2DBackpropInput, we skip SetUsrMem call for input tensor (for - // Conv2DBackpropInput) and for filter tensor (for - // conv2DBackpropFilter) depending on which tensor is int32 type. - size_t input_with_sizes = GetInputTensorIndexWithSizes(); - if (input_with_sizes != kInputIdx) { - // Shape of Conv2DBackpropFilter's input is same as Conv2D input. - input.SetUsrMem(fwd_input_md, &input_tensor); - } else if (input_with_sizes != kFilterIdx) { - // Shape of Conv2DBackpropInput's filter is same as Conv2D filter. - filter.SetUsrMem(fwd_filter_md, &filter_tensor); - } - - conv_utl.GetInputSizeInMklOrder(outbprop_tf_shape, &outbprop_dims); - if (!context->status().ok()) return; - if (outbprop_mkl_shape.IsMklTensor()) { - // If outbackprop is in Mkl layout, then simply grab it. - auto outbprop_md = outbprop_mkl_shape.GetMklLayout(); - outbackprop.SetUsrMem(outbprop_md, &outbprop_tensor); - } else { - // If outbackprop is in TensorFlow layout, then we need to create memory - // descriptor for it. Outbackprop shape is data format order. - outbackprop.SetUsrMem(outbprop_dims, tf_fmt, &outbprop_tensor); - } - - // Operator specific call to get output shape and data_format. - auto bwd_output_dims = GetOutputDims(fwd_input_dims, fwd_filter_dims); - auto bwd_output_format = GetOutputFormat(tf_fmt); - output.SetUsrMem(bwd_output_dims, bwd_output_format); - - // Create memory descriptors for convolution data w/ no specified format. - input.SetOpMemDesc(fwd_input_dims, memory::format::any); - filter.SetOpMemDesc(fwd_filter_dims, memory::format::any); - outbackprop.SetOpMemDesc(outbprop_dims, memory::format::any); - output.SetOpMemDesc(bwd_output_dims, memory::format::any); - - // Operator-specific call to create and execute primitive. - CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter, - &outbackprop, &output, &output_tensor, - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_), - bwd_output_dims, bwd_output_format); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); - } - } - - /// Pure virtual function to allow operator to check for validity of input - /// shapes. Function asserts that input shapes are valid. - virtual void ValidateMklShapes(const MklDnnShape& input_mkl_shape, - const MklDnnShape& filter_mkl_shape, - const MklDnnShape& outbprop_mkl_shape) = 0; - - /// Operator-specific function that returns index of input that is - /// representing input sizes. For Conv2DBackpropFilter it returns 1 since - /// filter for this operator is filter shape. For Conv2DBackpropInput it - /// returns 0 (for input). - virtual size_t GetInputTensorIndexWithSizes() = 0; - - /// Get TensorFlow shape of input tensor. - virtual TensorShape MakeInputTfShape(OpKernelContext* context, - const Tensor& input_tensor) = 0; - - /// Get TensorFlow shape of filter tensor. - virtual TensorShape MakeFilterTfShape(OpKernelContext* context, - const Tensor& filter_tensor) = 0; - - /// Get the TensorFlow shape of output tensor. - virtual TensorShape GetOutputTfShape(const TensorShape& input_shape, - const TensorShape& filter_shape, - const TensorShape& outbprop_shape) = 0; - - /// Get shape of output in MKL-DNN order. Computes shape of output from - /// input shape (fwd_input_dims) and filter shape (fwd_filter_dims). - virtual const memory::dims& GetOutputDims( - const memory::dims& fwd_input_dims, - const memory::dims& fwd_filter_dims) = 0; - - /// Get data_format of output in MKL-DNN order. If output data format is - /// same as input data format, then it simply returns value of data_format - /// parameter as it is. - virtual memory::format GetOutputFormat(const memory::format data_format) = 0; - - /// Create and execute the primitive storing output in the output_tensor. - virtual void CreatePrimitive(OpKernelContext* context, - const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, - MklDnnData* output, Tensor** output_tensor, const memory::dims& strides, - const memory::dims& dilations, const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) = 0; - - // Get the data_format {NCHW, NHWC} - TensorFormat GetTFDataFormat() { return data_format_; } - - private: + protected: + // data members accessible to derived classes. std::vector dilations_; std::vector strides_; Padding padding_; - TensorFormat data_format_; + TensorFormat data_format_; // NCHW or NHWC }; + #endif // INTEL_MKL_ML + ///////////////////////////////////////////////////////////////////// /// Dummy Mkl op that is just used for operators that are intermediate /// output of node fusion in the graph diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 230b4278ca..f19307756f 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1794,11 +1794,11 @@ class MklDnnData { } }; -/// Base class for operations with reuse of DNN primitives +/// Base class for operations with reuse of primitives /// -class DnnOp { +class MklPrimitive { public: - virtual ~DnnOp() {} + virtual ~MklPrimitive() {} // Dummy data. Its size, hard-coded as 256 here, does // not matter since MKL should never operate on this buffer. @@ -1806,33 +1806,33 @@ class DnnOp { }; const mkldnn::memory::dims NONE_DIMS = {}; -// This constant is used to declare dummy buffer (size), for MKL primitives + template -class DnnOpFactory { +class MklPrimitiveFactory { public: - DnnOpFactory() {} - ~DnnOpFactory() {} + MklPrimitiveFactory() {} + ~MklPrimitiveFactory() {} - DnnOp* GetOp(const std::string& key) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); - if (stream_iter == DnnOpFactory::GetHashMap().end()) { + MklPrimitive* GetOp(const std::string& key) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { return nullptr; } else { return stream_iter->second; } } - void SetOp(const std::string& key, DnnOp* op) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); + void SetOp(const std::string& key, MklPrimitive* op) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - CHECK(stream_iter == DnnOpFactory::GetHashMap().end()); + CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); - DnnOpFactory::GetHashMap()[key] = op; + MklPrimitiveFactory::GetHashMap()[key] = op; } private: - static inline std::unordered_map &GetHashMap() { - static thread_local std::unordered_map map_; + static inline std::unordered_map &GetHashMap() { + static thread_local std::unordered_map map_; return map_; } }; -- GitLab From 28727ba97c1b3d2987b96a51061f735157ebabf6 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 18 May 2018 11:40:08 -0700 Subject: [PATCH 0017/2038] enhancement with BatchNorm primitive reuse --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 +++--- .../core/kernels/mkl_fused_batch_norm_op.cc | 904 +++++++++++++----- tensorflow/core/util/mkl_util.h | 59 +- 3 files changed, 858 insertions(+), 385 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index f2b14f1278..c032add82e 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,7 +59,8 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -struct ConvFwdDimensions { +// This structure aggregates multiple inputs to Conv2DFwd* methods. +struct MklConvFwdParams { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -69,7 +70,7 @@ struct ConvFwdDimensions { memory::dims padding_left; memory::dims padding_right; - ConvFwdDimensions(memory::dims src_dims, + MklConvFwdParams(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -82,35 +83,40 @@ struct ConvFwdDimensions { }; template -class Conv2DFwd : public DnnOp { +class MklConv2DFwdPrimitive: public MklPrimitive { public: - explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { - fwd_stream_.reset(new stream(stream::kind::eager)); + explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { + context_.fwd_stream.reset(new stream(stream::kind::eager)); // create conv primitive - if (conv_fwd_ == nullptr) { + if (context_.conv_fwd == nullptr) { Setup(convFwdDims); } } - ~Conv2DFwd() {} + ~MklConv2DFwdPrimitive() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - bias_mem_->set_data_handle(static_cast(bias_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); + void Execute(const T* src_data, const T* filter_data, + const T* bias_data, const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.bias_mem->set_data_handle( + static_cast(const_cast(bias_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - bias_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.bias_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } @@ -119,139 +125,174 @@ class Conv2DFwd : public DnnOp { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); - - // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + void Execute(const T* src_data, const T* filter_data, + const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } - // expected memory format for this primitive instance - memory::format src_fmt_; - memory::format filter_fmt_; + memory::format GetSrcMemoryFormat() const { + return context_.src_fmt; + } + + memory::format GetFilterMemoryFormat() const { + return context_.filter_fmt; + } - // convolution primitive - std::shared_ptr fwd_pd_; - std::shared_ptr conv_fwd_; + std::shared_ptr + GetPrimitiveDesc() const { + return context_.fwd_pd; + } private: - void Setup(const ConvFwdDimensions& convFwdDims) { + // Primitive reuse context for Conv2D Fwd op + struct ConvFwdContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format filter_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr filter_mem; + std::shared_ptr bias_mem; + std::shared_ptr dst_mem; + + // desc & prmitive desc + std::shared_ptr fwd_desc; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr filter_md; + std::shared_ptr bias_md; + std::shared_ptr dst_md; + + // convolution primitive + std::shared_ptr fwd_pd; + std::shared_ptr conv_fwd; + + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + ConvFwdContext() : + src_fmt(memory::format::any), filter_fmt(memory::format::any), + src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), + dst_mem(nullptr), fwd_desc(nullptr), + src_md(nullptr), filter_md(nullptr), bias_md(nullptr), + fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + void Setup(const MklConvFwdParams& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - src_md_.reset(new memory::desc({convFwdDims.src_dims}, + context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, + context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, + context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, + context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.bias_md, *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *dst_md_, - convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, - convFwdDims.padding_right, padding_kind::zero)); + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, + convFwdDims.padding_left, convFwdDims.padding_right, + padding_kind::zero)); } - fwd_pd_.reset(new convolution_forward::primitive_desc( - *fwd_desc_, cpu_engine_)); + context_.fwd_pd.reset(new convolution_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); // store the expected memory format - src_fmt_ = static_cast( - fwd_pd_.get()->src_primitive_desc().desc().data.format); + context_.src_fmt = static_cast( + context_.fwd_pd.get()->src_primitive_desc().desc().data.format); - filter_fmt_ = static_cast( - fwd_pd_.get()->weights_primitive_desc().desc().data.format); + context_.filter_fmt = static_cast( + context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); - filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), - DummyData)); - dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); + context_.src_mem.reset(new memory( + context_.fwd_pd.get()->src_primitive_desc(), DummyData)); + context_.filter_mem.reset(new memory( + context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); + context_.dst_mem.reset(new memory( + context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), - memory::format::x}, cpu_engine_}, DummyData)); - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *bias_mem_, *dst_mem_)); + context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, + MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, + *context_.bias_mem, *context_.dst_mem)); } else { - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *dst_mem_)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, + *context_.filter_mem, *context_.dst_mem)); } - fwd_primitives_.push_back(*conv_fwd_); + context_.fwd_primitives.push_back(*context_.conv_fwd); return; } - - // MKLDNN memory - std::shared_ptr src_mem_; - std::shared_ptr filter_mem_; - std::shared_ptr bias_mem_; - std::shared_ptr dst_mem_; - - std::shared_ptr fwd_stream_; - std::vector fwd_primitives_; - - // desc & prmitive desc - std::shared_ptr fwd_desc_; - - // memory desc - std::shared_ptr src_md_; - std::shared_ptr filter_md_; - std::shared_ptr bias_md_; - std::shared_ptr dst_md_; - - engine cpu_engine_ = engine(engine::cpu, 0); }; template -class Conv2DFwdFactory : public DnnOpFactory { +class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { public: - static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { - Conv2DFwd* conv2d_fwd = nullptr; + static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { + MklConv2DFwdPrimitive* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( + convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new Conv2DFwd(convFwdDims); - Conv2DFwdFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); + MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - Conv2DFwdFactory() {} - ~Conv2DFwdFactory() {} + MklConv2DFwdPrimitiveFactory() {} + ~MklConv2DFwdPrimitiveFactory() {} static const int kDilationH = 0, kDilationW = 1; - static Conv2DFwdFactory& GetInstance() { - static Conv2DFwdFactory instance_; + static MklConv2DFwdPrimitiveFactory& GetInstance() { + static MklConv2DFwdPrimitiveFactory instance_; return instance_; } - static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { + static std::string CreateKey(const MklConvFwdParams& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -266,12 +307,12 @@ class Conv2DFwdFactory : public DnnOpFactory { return key_creator.GetKey(); } - DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { + MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { + void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -762,7 +803,6 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); - MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -812,7 +852,6 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); - src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -820,29 +859,28 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); - filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - Conv2DFwd *conv2d_fwd = nullptr; + MklConv2DFwdPrimitive *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, + MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } else { - ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, + MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->fwd_pd_; + conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -854,20 +892,30 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - if (src_md.data.format != conv2d_fwd->src_fmt_) - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - - if (filter_md.data.format != conv2d_fwd->filter_fmt_) - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); + T *src_data = nullptr; + if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast(const_cast( + src_tensor.flat().data())); + } + T *filter_data = nullptr; + if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { + filter.SetUsrMem(filter_md, &filter_tensor); + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); + filter_data = static_cast(filter.GetOpMem().get_data_handle()); + } else { + filter_data = static_cast(const_cast( + filter_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); - T* src_data = static_cast( - src.GetOpMem().get_data_handle()); - T* filter_data = static_cast( - filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 62aafa7930..f4f6f8457d 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -262,6 +262,7 @@ class MklFusedBatchNormOp : public OpKernel { } void MklCreateInputLayout(OpKernelContext* context) { + const Tensor& input = MklGetInput(context, 0); bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); if (input_in_mkl_format) { mkl_lt_input = @@ -544,6 +545,7 @@ class MklFusedBatchNormGradOp : public OpKernel { } void MklCreateInputLayout(OpKernelContext* context) { + const Tensor& input = MklGetInput(context, 0); bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); if (input_in_mkl_format) { mkl_lt_input = @@ -684,6 +686,465 @@ class MklFusedBatchNormGradOp : public OpKernel { #ifndef INTEL_MKL_ML +struct MklBatchNormFwdParams { + memory::dims src_dims; + int depth; + float eps; + bool training; + + MklBatchNormFwdParams(const memory::dims &src_dims, + int depth, float eps, bool training) : src_dims(src_dims), + depth(depth), eps(eps), training(training) { + } +}; + +template +class MklFusedBatchNormFwdPrimitive : public MklPrimitive { + public: + explicit MklFusedBatchNormFwdPrimitive( + const MklBatchNormFwdParams& fwdParams) { + context_.fwd_stream.reset( + new mkldnn::stream(mkldnn::stream::kind::eager)); + if (context_.bn_fwd == nullptr) + Setup(fwdParams); + } + + ~MklFusedBatchNormFwdPrimitive() {} + + // BatchNormalization forward execute + // src_data: input data buffer of src + // weights_data: input data buffer of weights + // dst_data: output data buffer of dst + // mean_data: input data buffer of means + // variance_data: input data buffer of variances + void Execute(const T* src_data, const T* weights_data, const T* dst_data, + const T* mean_data, const T* variance_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + + if (context_.flags & use_scale_shift) + context_.weights_mem->set_data_handle( + static_cast(const_cast(weights_data))); + + if ((context_.pkind == prop_kind::forward_training) || + (context_.flags & use_global_stats)) { + context_.mean_mem->set_data_handle( + static_cast(const_cast(mean_data))); + context_.variance_mem->set_data_handle( + static_cast(const_cast(variance_data))); + } + + // execution + context_.fwd_stream->submit(context_.fwd_primitives); + + context_.src_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); + + if (context_.flags & use_scale_shift) + context_.weights_mem->set_data_handle(DummyData); + + if ((context_.pkind == prop_kind::forward_training) || + (context_.flags & use_global_stats)) { + context_.mean_mem->set_data_handle(DummyData); + context_.variance_mem->set_data_handle(DummyData); + } + return; + } + + memory::primitive_desc GetDstPd() const { + return (*context_.dst_mem).get_primitive_desc(); + } + + mkldnn_memory_format_t GetSrcFmt() const { + return (*context_.src_mem).get_primitive_desc().desc().data.format; + } + + mkldnn_memory_format_t GetDstFmt() const { + return (*context_.dst_mem).get_primitive_desc().desc().data.format; + } + + private: + // Primitive reuse context for BatchNorm fwd op + struct BatchNormFwdContext { + // flags indict if it is training or inference mode + int64 flags; + + // algorithm + mkldnn::prop_kind pkind; + + // Mkldnn Memory + std::shared_ptr src_mem; + std::shared_ptr weights_mem; + std::shared_ptr dst_mem; + std::shared_ptr mean_mem; + std::shared_ptr variance_mem; + + // BatchNorm forward primitive + std::shared_ptr bn_fwd; + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + BatchNormFwdContext() : + flags(0), pkind(mkldnn::forward_training), src_mem(nullptr), + weights_mem(nullptr), dst_mem(nullptr), mean_mem(nullptr), + variance_mem(nullptr), bn_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + void Setup(const MklBatchNormFwdParams& fwdParams) { + context_.flags = fwdParams.training ? use_scale_shift + : (use_scale_shift | use_global_stats); + context_.pkind = fwdParams.training ? prop_kind::forward_training + : prop_kind::forward_scoring; + + // memory desc + auto src_md = memory::desc({fwdParams.src_dims}, + MklDnnType(), get_desired_format(fwdParams.src_dims[1])); + + // fwd desc & primitive desc + auto fwd_desc = batch_normalization_forward::desc( + context_.pkind, src_md, fwdParams.eps, context_.flags); + auto fwd_pd = batch_normalization_forward::primitive_desc( + fwd_desc, cpu_engine_); + + // memory primitive + context_.src_mem.reset(new memory({src_md, cpu_engine_}, DummyData)); + context_.dst_mem.reset(new memory(fwd_pd.dst_primitive_desc(), DummyData)); + + if (context_.flags & use_scale_shift) { + auto weights_desc = memory::desc({2, fwdParams.depth}, + MklDnnType(), memory::format::nc); + context_.weights_mem.reset(new memory({weights_desc, cpu_engine_}, + DummyData)); + } + + if (fwdParams.training || (context_.flags & use_global_stats)) { + auto mean_desc = memory::desc({1, fwdParams.depth}, + MklDnnType(), memory::format::nc); + context_.mean_mem.reset(new memory({mean_desc, cpu_engine_}, DummyData)); + + auto variance_desc = memory::desc({1, fwdParams.depth}, + MklDnnType(), memory::nc); + context_.variance_mem.reset(new memory({variance_desc, cpu_engine_}, + DummyData)); + } + + // BatchNorm forward primitive + if (!fwdParams.training && !(context_.flags & use_global_stats)) { + if ((context_.flags & use_scale_shift) && mkldnn_use_scaleshift) { + context_.bn_fwd.reset(new batch_normalization_forward(fwd_pd, + *context_.src_mem, *context_.weights_mem, *context_.dst_mem)); + } else { + context_.bn_fwd.reset(new batch_normalization_forward( + fwd_pd, *context_.src_mem, *context_.dst_mem)); + } + } else if (context_.flags & use_global_stats) { + if ((context_.flags & use_scale_shift) && mkldnn_use_scaleshift) { + context_.bn_fwd.reset(new batch_normalization_forward( + fwd_pd, *context_.src_mem, (const primitive::at)*context_.mean_mem, + (const primitive::at)*context_.variance_mem, *context_.weights_mem, + *context_.dst_mem)); + } else { + context_.bn_fwd.reset(new batch_normalization_forward( + fwd_pd, *context_.src_mem, (const primitive::at)*context_.mean_mem, + (const primitive::at)*context_.variance_mem, *context_.dst_mem)); + } + } else { + if ((context_.flags & use_scale_shift) && mkldnn_use_scaleshift) { + context_.bn_fwd.reset(new batch_normalization_forward( + fwd_pd, *context_.src_mem, *context_.weights_mem, *context_.dst_mem, + *context_.mean_mem, *context_.variance_mem)); + } else { + context_.bn_fwd.reset(new batch_normalization_forward( + fwd_pd, *context_.src_mem, *context_.dst_mem, + *context_.mean_mem, *context_.variance_mem)); + } + } + + context_.fwd_primitives.push_back(*context_.bn_fwd); + return; + } + + mkldnn::memory::desc get_desc_data(const mkldnn::memory &m) const { + return m.get_primitive_desc().desc().data; + } + + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +template +class MklFusedBatchNormFwdPrimitiveFactory : public MklPrimitiveFactory { + public: + static MklFusedBatchNormFwdPrimitive* Get( + const MklBatchNormFwdParams& fwdParams) { + auto bn_fwd = static_cast*>( + MklFusedBatchNormFwdPrimitiveFactory + ::GetInstance().GetBatchNormFwd(fwdParams)); + + if (bn_fwd == nullptr) { + bn_fwd = new MklFusedBatchNormFwdPrimitive(fwdParams); + MklFusedBatchNormFwdPrimitiveFactory::GetInstance().SetBatchNormFwd( + fwdParams, bn_fwd); + } + return bn_fwd; + } + + static MklFusedBatchNormFwdPrimitiveFactory & GetInstance() { + static MklFusedBatchNormFwdPrimitiveFactory instance_; + return instance_; + } + + private: + MklFusedBatchNormFwdPrimitiveFactory() {} + ~MklFusedBatchNormFwdPrimitiveFactory() {} + + static std::string CreateKey(const MklBatchNormFwdParams& fwdParams) { + std::string prefix = "bn_fwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(fwdParams.src_dims); + key_creator.AddAsKey(fwdParams.depth); + key_creator.AddAsKey(fwdParams.eps); + key_creator.AddAsKey(fwdParams.training); + return key_creator.GetKey(); + } + + MklPrimitive* GetBatchNormFwd(const MklBatchNormFwdParams& fwdParams) { + std::string key = CreateKey(fwdParams); + return this->GetOp(key); + } + + void SetBatchNormFwd(const MklBatchNormFwdParams& fwdParams, + MklPrimitive *op) { + std::string key = CreateKey(fwdParams); + this->SetOp(key, op); + } +}; + +struct MklBatchNormBwdParams { + memory::dims src_dims; + memory::dims diff_dst_dims; + int depth; + float eps; + bool training; + + MklBatchNormBwdParams(memory::dims src_dims, memory::dims diff_dst_dims, + int depth, float eps, bool training) : src_dims(src_dims), + diff_dst_dims(diff_dst_dims), depth(depth), eps(eps), + training(training) { + } +}; + + +template +class MklFusedBatchNormBwdPrimitive : public MklPrimitive { + public: + explicit MklFusedBatchNormBwdPrimitive( + const MklBatchNormBwdParams& bwdParams) { + context_.bwd_stream.reset( + new mkldnn::stream(mkldnn::stream::kind::eager)); + if (context_.bn_bwd == nullptr) + Setup(bwdParams); + } + + ~MklFusedBatchNormBwdPrimitive() {} + + // BatchNormalization backward execute + // src_data: input data buffer of src + // mean_data: input data buffer of mean + // variance_data: input data buffer of variance + // diff_dst_data: input data buffer of diff_dst + // weights_data: input data buffer of weights + // diff_src_data: output data buffer of diff_src + // diff_weights_data: output data buffer of diff_weights + void Execute(const T* src_data, const T* mean_data, const T* variance_data, + const T* diff_dst_data, const T* weights_data, + const T* diff_src_data, const T* diff_weights_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.mean_mem->set_data_handle( + static_cast(const_cast(mean_data))); + context_.variance_mem->set_data_handle( + static_cast(const_cast(variance_data))); + context_.diff_dst_mem->set_data_handle( + static_cast(const_cast(diff_dst_data))); + + if (context_.flags & use_scale_shift) { + context_.weights_mem->set_data_handle( + static_cast(const_cast(weights_data))); + context_.diff_weights_mem->set_data_handle( + static_cast(const_cast(diff_weights_data))); + } + + context_.diff_src_mem->set_data_handle( + static_cast(const_cast(diff_src_data))); + + // execution + context_.bwd_stream->submit(context_.bwd_primitives); + + context_.src_mem->set_data_handle(DummyData); + context_.mean_mem->set_data_handle(DummyData); + context_.variance_mem->set_data_handle(DummyData); + context_.diff_dst_mem->set_data_handle(DummyData); + if (context_.flags & use_scale_shift) { + context_.weights_mem->set_data_handle(DummyData); + context_.diff_weights_mem->set_data_handle(DummyData); + } + context_.diff_src_mem->set_data_handle(DummyData); + return; + } + + mkldnn_memory_format_t GetSrcFmt() { + return(*context_.src_mem).get_primitive_desc().desc().data.format; + } + + mkldnn_memory_format_t GetDiffDstFmt() { + return(*context_.diff_dst_mem).get_primitive_desc().desc().data.format; + } + + memory::primitive_desc GetDiffSrcPd() { + return(*context_.diff_src_mem).get_primitive_desc(); + } + + private: + struct BatchNormBwdContext { + // Flags to indicate whether it is training or inference + int64 flags; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr mean_mem; + std::shared_ptr variance_mem; + std::shared_ptr diff_dst_mem; + std::shared_ptr weights_mem; + std::shared_ptr diff_weights_mem; + std::shared_ptr diff_src_mem; + + // Batch Norm primitive + std::shared_ptr bn_bwd; + std::vector bwd_primitives; + std::shared_ptr bwd_stream; + + BatchNormBwdContext() : + src_mem(nullptr), mean_mem(nullptr), variance_mem(nullptr), + diff_dst_mem(nullptr), weights_mem(nullptr), diff_weights_mem(nullptr), + diff_src_mem(nullptr), bwd_stream(nullptr) { + } + } context_; + void Setup(const MklBatchNormBwdParams& bwdParams) { + context_.flags = bwdParams.training ? use_scale_shift + : (use_scale_shift | use_global_stats); + + // memory desc + auto src_md = memory::desc({bwdParams.src_dims}, + MklDnnType(), get_desired_format(bwdParams.src_dims[1])); + auto diff_dst_md = memory::desc({bwdParams.diff_dst_dims}, + MklDnnType(), get_desired_format(bwdParams.diff_dst_dims[1])); + auto variance_desc = memory::desc({1, bwdParams.depth}, MklDnnType(), + memory::nc); + auto mean_desc = memory::desc({1, bwdParams.depth}, + MklDnnType(), memory::format::nc); + auto weights_desc = memory::desc({2, bwdParams.depth}, + MklDnnType(), memory::format::nc); + auto diff_weights_desc = weights_desc; + + // fwd desc & primitive desc + auto fwd_desc = batch_normalization_forward::desc( + prop_kind::forward_training, src_md, bwdParams.eps, + bwdParams.training + ? use_scale_shift + : (use_scale_shift | use_global_stats)); + auto fwd_pd = batch_normalization_forward::primitive_desc( + fwd_desc, cpu_engine_); + + // BatchNorm backward primtive + // + // For inference, specify use_global_stats + // 1. on fwd propagation, use mean and variance provided as inputs. + // 2. on bwd propagation, mean and variance are considered as constants. + // Thus, reduce the amount of MKL computation. + auto bwd_desc = batch_normalization_backward::desc( + prop_kind::backward, diff_dst_md, src_md, bwdParams.eps, + bwdParams.training ? use_scale_shift + : (use_scale_shift | use_global_stats)); + auto bn_bwd_pd = batch_normalization_backward::primitive_desc( + bwd_desc, cpu_engine_, fwd_pd); + + // memory primitive + context_.src_mem.reset(new memory({src_md, cpu_engine_}, DummyData)); + context_.diff_dst_mem.reset(new memory({diff_dst_md, cpu_engine_}, + DummyData)); + context_.variance_mem.reset(new memory({variance_desc, cpu_engine_}, + DummyData)); + context_.mean_mem.reset(new memory({mean_desc, cpu_engine_}, DummyData)); + context_.weights_mem.reset(new memory({weights_desc, cpu_engine_}, + DummyData)); + context_.diff_weights_mem.reset(new memory({diff_weights_desc, cpu_engine_}, + DummyData)); + context_.diff_src_mem.reset(new memory({src_md, cpu_engine_}, DummyData)); + + context_.bn_bwd.reset(new batch_normalization_backward( + bn_bwd_pd, *context_.src_mem, *context_.mean_mem, + *context_.variance_mem, *context_.diff_dst_mem, *context_.weights_mem, + *context_.diff_src_mem, *context_.diff_weights_mem)); + context_.bwd_primitives.push_back(*context_.bn_bwd); + return; + } + + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +template +class MklFusedBatchNormBwdPrimitiveFactory : public MklPrimitiveFactory { + public: + static MklFusedBatchNormBwdPrimitive* Get( + const MklBatchNormBwdParams& bwdParams) { + auto bn_bwd = static_cast*>( + MklFusedBatchNormBwdPrimitiveFactory + ::GetInstance().GetBatchNormBwd(bwdParams)); + if (bn_bwd == nullptr) { + bn_bwd = new MklFusedBatchNormBwdPrimitive(bwdParams); + MklFusedBatchNormBwdPrimitiveFactory::GetInstance().SetBatchNormBwd( + bwdParams, bn_bwd); + } + return bn_bwd; + } + + static MklFusedBatchNormBwdPrimitiveFactory& GetInstance() { + static MklFusedBatchNormBwdPrimitiveFactory instance_; + return instance_; + } + + private: + MklFusedBatchNormBwdPrimitiveFactory() {} + ~MklFusedBatchNormBwdPrimitiveFactory() {} + + static std::string CreateKey(const MklBatchNormBwdParams& bwdParams) { + std::string prefix = "bn_bwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(bwdParams.src_dims); + key_creator.AddAsKey(bwdParams.diff_dst_dims); + key_creator.AddAsKey(bwdParams.depth); + key_creator.AddAsKey(bwdParams.eps); + key_creator.AddAsKey(bwdParams.training); + return key_creator.GetKey(); + } + + MklPrimitive* GetBatchNormBwd(const MklBatchNormBwdParams& bwdParams) { + std::string key = CreateKey(bwdParams); + return this->GetOp(key); + } + + void SetBatchNormBwd(const MklBatchNormBwdParams& bwdParams, + MklPrimitive* op) { + std::string key = CreateKey(bwdParams); + this->SetOp(key, op); + } +}; + template class MklFusedBatchNormOp : public OpKernel { public: @@ -701,7 +1162,6 @@ class MklFusedBatchNormOp : public OpKernel { void Compute(OpKernelContext* context) override { try { - auto cpu_engine = engine(engine::cpu, 0); const size_t kSrcIndex = 0; // index of src input tensor const size_t kScaleIndex = 1; // index of scale tensor const size_t kShiftIndex = 2; // index of shift tensor @@ -786,7 +1246,7 @@ class MklFusedBatchNormOp : public OpKernel { SetMeanVariance(est_mean_tensor, est_variance_tensor); MklDnnData src(&cpu_engine); - MklDnnData dst(&cpu_engine); + MklDnnData weights(&cpu_engine); memory::format format_m; if (dnn_shape_src.IsMklTensor()) { @@ -800,123 +1260,108 @@ class MklFusedBatchNormOp : public OpKernel { } // set src primitive - memory::dims src_dims; - if (dnn_shape_src.IsMklTensor()) { - src_dims = TFShapeToMklDnnDimsInNCHW(dnn_shape_src.GetTfShape(), - tensor_format_); - } else { - src_dims = - TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), tensor_format_); - } + memory::dims src_dims = dnn_shape_src.IsMklTensor() + ? dnn_shape_src.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), tensor_format_); auto src_md = dnn_shape_src.IsMklTensor() ? dnn_shape_src.GetMklLayout() : memory::desc(src_dims, MklDnnType(), format_m); - src.SetUsrMem(src_md, &src_tensor); - // set weights primitive // MKL-DNN packs scale & shift as "weights": // ...... - auto weights_desc = memory::desc({2, static_cast(depth_)}, - MklDnnType(), memory::format::nc); - auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine); - auto weights_m = memory(weights_pd); - T* weights_data = reinterpret_cast(weights_m.get_data_handle()); + weights.AllocateBuffer(2 * depth_ * sizeof (T)); + T* weights_data = reinterpret_cast(weights.GetAllocatedBuffer()); T* scale_tf = reinterpret_cast(const_cast(scale_tensor.flat().data())); T* shift_tf = reinterpret_cast(const_cast(shift_tensor.flat().data())); - for (int k = 0; k < depth_; k++) { - weights_data[k] = scale_tf[k]; - weights_data[k + depth_] = shift_tf[k]; - } - - // set mean primitive - auto mean_desc = memory::desc({1, static_cast(depth_)}, - MklDnnType(), memory::format::nc); - auto mean_pd = memory::primitive_desc(mean_desc, cpu_engine); + std::memcpy(weights_data, scale_tf, depth_ * sizeof(T)); + std::memcpy(weights_data + depth_, shift_tf, depth_ * sizeof(T)); char* saved_mean_data_tf = reinterpret_cast(saved_mean_tensor->flat().data()); std::memcpy(saved_mean_data_tf, reinterpret_cast(mean_values_), depth_ * sizeof(T)); - auto mean_m = - memory(mean_pd, reinterpret_cast(saved_mean_data_tf)); - // set variance primitive - auto variance_desc = memory::desc({1, static_cast(depth_)}, - MklDnnType(), memory::format::nc); - auto variance_pd = memory::primitive_desc(variance_desc, cpu_engine); char* saved_variance_data_tf = reinterpret_cast(saved_variance_tensor->flat().data()); std::memcpy(saved_variance_data_tf, reinterpret_cast(variance_values_), depth_ * sizeof(T)); - auto variance_m = memory(variance_pd, saved_variance_data_tf); - - prop_kind pk = (is_training_) ? prop_kind::forward_training - : prop_kind::forward_scoring; - auto bnrm_fwd_desc = batch_normalization_forward::desc( - pk, src.GetUsrMemDesc(), epsilon_, - is_training_ ? use_scale_shift - : (use_scale_shift | use_global_stats)); - auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc( - bnrm_fwd_desc, cpu_engine); - - // allocate dst tensor - MklDnnShape dnn_shape_dst; - TensorShape tf_shape_dst; - if (dnn_shape_src.IsMklTensor()) { - dnn_shape_dst.SetMklTensor(true); - auto dst_pd = bnrm_fwd_pd.dst_primitive_desc(); - dnn_shape_dst.SetMklLayout(&dst_pd); - dnn_shape_dst.SetElemType(MklDnnType()); - dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(), src_dims, - format_m); - tf_shape_dst.AddDim(dst_pd.get_size() / sizeof(T)); - } else { - dnn_shape_dst.SetMklTensor(false); - tf_shape_dst = src_tensor.shape(); - } - AllocateOutputSetMklShape(context, kDstIndex, &dst_tensor, tf_shape_dst, - dnn_shape_dst); - // Output of batchnorm has same shape as input. - dst.SetUsrMem(src_md, dst_tensor); + // get batchnorm op from the pool + MklBatchNormFwdParams fwdParams(src_dims, depth_, epsilon_, is_training_); + MklFusedBatchNormFwdPrimitive *bn_fwd = + MklFusedBatchNormFwdPrimitiveFactory::Get(fwdParams); - primitive bnrm_fwd_op; - if (is_training_) { - bnrm_fwd_op = - batch_normalization_forward(bnrm_fwd_pd, src.GetOpMem(), weights_m, - dst.GetOpMem(), mean_m, variance_m); + // check if reorder is needed for src, weights, mean, variance + std::vector net; + T* src_data = nullptr; + if (src_md.data.format != bn_fwd->GetSrcFmt()) { + src.SetUsrMem(src_md, &src_tensor); + auto src_target = memory::primitive_desc({{src_dims}, MklDnnType(), + static_cast(bn_fwd->GetSrcFmt())}, cpu_engine); + src.CheckReorderToOpMem(src_target, &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); } else { - bnrm_fwd_op = batch_normalization_forward( - bnrm_fwd_pd, src.GetOpMem(), mean_m, variance_m, - (const primitive::at)weights_m, dst.GetOpMem()); + src_data = static_cast( + const_cast(src_tensor.flat().data())); } - std::vector net; - net.push_back(bnrm_fwd_op); stream(stream::kind::eager).submit(net).wait(); + // allocate output (dst) tensor; always set it as MKL-DNN layout + MklDnnShape dnn_shape_dst; + TensorShape tf_shape_dst; + dnn_shape_dst.SetMklTensor(true); + auto dst_pd = bn_fwd->GetDstPd(); + dnn_shape_dst.SetMklLayout(&dst_pd); + dnn_shape_dst.SetElemType(MklDnnType()); + auto ndims = dnn_shape_src.IsMklTensor() + ? dnn_shape_src.GetDimension() + : src_tensor.shape().dims(); + dnn_shape_dst.SetTfLayout(ndims, src_dims, format_m); + tf_shape_dst.AddDim(dst_pd.get_size() / sizeof(T)); + AllocateOutputSetMklShape(context, kDstIndex, &dst_tensor, + tf_shape_dst, dnn_shape_dst); + + T* weights_op_data = weights_data; + T* mean_op_data = reinterpret_cast( + saved_mean_tensor->flat().data()); + T* variance_op_data = reinterpret_cast( + saved_variance_tensor->flat().data()); + T* dst_data = static_cast(dst_tensor->flat().data()); + + // execution + bn_fwd->Execute(src_data, weights_op_data, dst_data, + mean_op_data, variance_op_data); + // copy batch_mean data T* batch_mean_data_tf = reinterpret_cast(batch_mean_tensor->flat().data()); std::memcpy(reinterpret_cast(batch_mean_data_tf), - reinterpret_cast(mean_m.get_data_handle()), + reinterpret_cast(saved_mean_data_tf), depth_ * sizeof(T)); + // TODO(yli135): OpMem is same as usr mem since + // since its format is hard-coded as nc when primitive is created. // copy batch_variance data with Bessel's correction - // if training mode is on float adjust_factor = 1.0; if (is_training_) { size_t orig_size = src_dims[0] * src_dims[2] * src_dims[3]; size_t adjust_size = orig_size - 1; adjust_factor = (static_cast(orig_size)) / adjust_size; } - for (int k = 0; k < depth_; k++) - batch_variance_tensor->flat().data()[k] = - (reinterpret_cast(variance_m.get_data_handle()))[k] * - adjust_factor; + + auto variance_data = reinterpret_cast(saved_variance_data_tf); + auto batch_variance_data = batch_variance_tensor->flat().data(); + if (is_training_) { + for (int k = 0; k < depth_; k++) { + batch_variance_data[k] = variance_data[k] * adjust_factor; + } + } else { + std::memcpy(batch_variance_data, variance_data, depth_ * sizeof(T)); + } } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + ", message: " + string(e.message) + ", in file " + @@ -933,7 +1378,8 @@ class MklFusedBatchNormOp : public OpKernel { bool is_training_; T* mean_values_; T* variance_values_; - int depth_; // batch normalization is done for per channel. + size_t depth_; // batch normalization is done for per channel. + engine cpu_engine = engine(engine::cpu, 0); void ExtractParams(OpKernelContext* context) { const Tensor& input = MklGetInput(context, 0); @@ -990,8 +1436,10 @@ class MklFusedBatchNormOp : public OpKernel { tf_shape_scale, mkl_shape_batch_mean); CHECK_NOTNULL(*batch_mean_tensor); // set NAN mean value in case of empty input tensor - for (int k = 0; k < tf_shape_scale.num_elements(); k++) - (*batch_mean_tensor)->flat().data()[k] = NAN; + int num_elements = tf_shape_scale.num_elements(); + auto batch_mean_data = (*batch_mean_tensor)->flat().data(); + for (int k = 0; k < num_elements; k++) + batch_mean_data[k] = NAN; // allocate batch variance output tensor MklDnnShape mkl_shape_batch_variance; @@ -1001,8 +1449,9 @@ class MklFusedBatchNormOp : public OpKernel { mkl_shape_batch_variance); CHECK_NOTNULL(*batch_variance_tensor); // set NAN variance value in case of empty input tensor - for (int k = 0; k < tf_shape_scale.num_elements(); k++) - (*batch_variance_tensor)->flat().data()[k] = NAN; + auto batch_variance_data = (*batch_variance_tensor)->flat().data(); + for (int k = 0; k < num_elements; k++) + batch_variance_data[k] = NAN; // Mean and variance (without Bessel's correction) saved for backward // computation to serve as pre-computed mean and variance. @@ -1012,8 +1461,9 @@ class MklFusedBatchNormOp : public OpKernel { tf_shape_scale, mkl_shape_saved_mean); CHECK_NOTNULL(*saved_mean_tensor); // set NAN mean value in case of empty input tensor - for (int k = 0; k < tf_shape_scale.num_elements(); k++) - (*saved_mean_tensor)->flat().data()[k] = NAN; + auto saved_mean_data = (*saved_mean_tensor)->flat().data(); + for (int k = 0; k < num_elements; k++) + saved_mean_data[k] = NAN; MklDnnShape mkl_shape_saved_variance; mkl_shape_saved_variance.SetMklTensor(false); @@ -1022,8 +1472,9 @@ class MklFusedBatchNormOp : public OpKernel { mkl_shape_saved_variance); CHECK_NOTNULL(*saved_variance_tensor); // set NAN variance value in case of empty input tensor - for (int k = 0; k < tf_shape_scale.num_elements(); k++) - (*saved_variance_tensor)->flat().data()[k] = NAN; + auto saved_variance_data = (*saved_variance_tensor)->flat().data(); + for (int k = 0; k < num_elements; k++) + saved_variance_data[k] = NAN; } }; @@ -1044,24 +1495,24 @@ class MklFusedBatchNormGradOp : public OpKernel { void Compute(OpKernelContext* context) override { try { - auto cpu_engine = engine(engine::cpu, 0); const size_t kDiffDstIndex = 0; // index of diff_dst tensor const size_t kSrcIndex = 1; // index of src input tensor const size_t kScaleIndex = 2; // index of scale tensor const size_t kMeanIndex = 3; // index of saved_mean tensor const size_t kVarianceIndex = 4; // index of saved_variance tensor + const Tensor& diff_dst_tensor = MklGetInput(context, kDiffDstIndex); const Tensor& src_tensor = MklGetInput(context, kSrcIndex); const Tensor& scale_tensor = MklGetInput(context, kScaleIndex); const Tensor& saved_mean_tensor = MklGetInput(context, kMeanIndex); const Tensor& saved_variance_tensor = - MklGetInput(context, kVarianceIndex); + MklGetInput(context, kVarianceIndex); MklDnnShape dnn_shape_src, dnn_shape_diff_dst; GetMklShape(context, kSrcIndex, &dnn_shape_src); GetMklShape(context, kDiffDstIndex, &dnn_shape_diff_dst); - TensorShape tf_shape_src, tf_shape_diff_dst; + TensorShape tf_shape_src, tf_shape_diff_dst; if (dnn_shape_diff_dst.IsMklTensor()) { tf_shape_diff_dst = dnn_shape_diff_dst.GetTfShape(); OP_REQUIRES( @@ -1102,6 +1553,7 @@ class MklFusedBatchNormGradOp : public OpKernel { saved_variance_tensor.shape().DebugString())); Tensor* diff_src_tensor = nullptr; + // special case: input with 0 element and 0 batch size if (tf_shape_src.num_elements() == 0 || tf_shape_diff_dst.num_elements() == 0) { HandleEmptyInput(context, tf_shape_src, scale_tensor.shape(), @@ -1117,174 +1569,114 @@ class MklFusedBatchNormGradOp : public OpKernel { ExtractParams(context); } + memory::format format_m; + if (dnn_shape_src.IsMklTensor()) { + if (dnn_shape_src.IsTensorInNCHWFormat()) + format_m = memory::format::nchw; + else + format_m = memory::format::nhwc; + } else { + format_m = TFDataFormatToMklDnnDataFormat(tensor_format_); + } + MklDnnData src(&cpu_engine); - MklDnnData mean(&cpu_engine); - MklDnnData variance(&cpu_engine); MklDnnData diff_dst(&cpu_engine); - MklDnnData diff_src(&cpu_engine); + MklDnnData weights(&cpu_engine); + MklDnnData diff_weights(&cpu_engine); + + memory::dims src_dims = dnn_shape_src.IsMklTensor() + ? dnn_shape_src.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), tensor_format_); + memory::dims diff_dst_dims = dnn_shape_diff_dst.IsMklTensor() + ? dnn_shape_diff_dst.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(), tensor_format_); + + // set src and diff_dst primitive descriptors + memory::desc src_md = dnn_shape_src.IsMklTensor() + ? dnn_shape_src.GetMklLayout() + : memory::desc(src_dims, MklDnnType(), format_m); + memory::desc diff_dst_md = dnn_shape_diff_dst.IsMklTensor() + ? dnn_shape_diff_dst.GetMklLayout() + : memory::desc(diff_dst_dims, MklDnnType(), format_m); + + // weights -- MKL DNN packs scales/ shifts as weights in order + // of scale, ..., scale, shift, ...., shift + weights.AllocateBuffer(2 * depth_ * sizeof(T)); + T* weights_data_tf = reinterpret_cast(weights.GetAllocatedBuffer()); + T* scale_tf = + reinterpret_cast(const_cast(scale_tensor.flat().data())); + for (int k = 0; k < depth_; k++) { + weights_data_tf[k] = scale_tf[k]; + weights_data_tf[k + depth_] = 0; + } - memory::dims src_dims, diff_dst_dims; - if (dnn_shape_src.IsMklTensor()) - src_dims = TFShapeToMklDnnDimsInNCHW(dnn_shape_src.GetTfShape(), - tensor_format_); - else - src_dims = - TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), tensor_format_); + diff_weights.AllocateBuffer(2 * depth_ * sizeof(T)); - if (dnn_shape_diff_dst.IsMklTensor()) - diff_dst_dims = TFShapeToMklDnnDimsInNCHW( - dnn_shape_diff_dst.GetTfShape(), tensor_format_); - else - diff_dst_dims = - TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(), tensor_format_); + MklBatchNormBwdParams bwdParams(src_dims, diff_dst_dims, + depth_, epsilon_, is_training_); + MklFusedBatchNormBwdPrimitive *bn_bwd = + MklFusedBatchNormBwdPrimitiveFactory::Get(bwdParams); - // set src and diff_dst primitives according to input layout - memory::desc src_md({}, memory::data_undef, memory::format_undef); - memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef); - if (dnn_shape_src.IsMklTensor()) { - src_md = dnn_shape_src.GetMklLayout(); + // check if src/diff_dst need to be reordered + std::vector net; + T* src_data = nullptr; + if (src_md.data.format != bn_bwd->GetSrcFmt()) { + src.SetUsrMem(src_md, &src_tensor); + auto src_target = memory::primitive_desc({{src_dims}, MklDnnType(), + static_cast(bn_bwd->GetSrcFmt())}, cpu_engine); + src.CheckReorderToOpMem(src_target, &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); } else { - src_md = memory::desc(src_dims, MklDnnType(), - TFDataFormatToMklDnnDataFormat(tensor_format_)); + src_data = static_cast(const_cast( + src_tensor.flat().data())); } - if (dnn_shape_diff_dst.IsMklTensor()) { - diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); + + T* diff_dst_data = nullptr; + if (diff_dst_md.data.format != bn_bwd->GetDiffDstFmt()) { + diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + auto diff_dst_target = memory::primitive_desc({{diff_dst_dims}, + MklDnnType(), static_cast( + bn_bwd->GetDiffDstFmt())}, cpu_engine); + diff_dst.CheckReorderToOpMem(diff_dst_target, &net); + diff_dst_data = static_cast( + diff_dst.GetOpMem().get_data_handle()); } else { - diff_dst_md = memory::desc(diff_dst_dims, MklDnnType(), - TFDataFormatToMklDnnDataFormat(tensor_format_)); + diff_dst_data = static_cast(const_cast( + diff_dst_tensor.flat().data())); } - src.SetUsrMem(src_md, &src_tensor); - diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); - - // weights -- DNN packs scales/shifts as weights in order of - // scale, ..., scale, shift, ..., shift - auto weights_desc = - memory::desc({2, depth_}, MklDnnType(), memory::format::nc); - auto weights_pd = memory::primitive_desc(weights_desc, cpu_engine); - auto weights_m = memory(weights_pd); - T* weights_data = reinterpret_cast(weights_m.get_data_handle()); - T* scale_tf = - reinterpret_cast(const_cast(scale_tensor.flat().data())); - for (int k = 0; k < depth_; k++) { - weights_data[k] = scale_tf[k]; - weights_data[k + depth_] = 0; - } - - // set mean primitive - memory::dims mv_dims = GetMeanVarianceDims(); - mean.SetUsrMem(mv_dims, memory::format::nc, - const_cast(static_cast( - saved_mean_tensor.flat().data()))); - mean.SetOpMemDesc(mv_dims, memory::format::nc); - - // set variance primitive - variance.SetUsrMem(mv_dims, memory::format::nc, - const_cast(static_cast( - saved_variance_tensor.flat().data()))); - variance.SetOpMemDesc(mv_dims, memory::format::nc); - - // set diff_weight primitive - auto diff_weights_desc = - memory::desc({2, depth_}, MklDnnType(), memory::format::nc); - auto diff_weights_pd = - memory::primitive_desc(diff_weights_desc, cpu_engine); - auto diff_weights_m = memory(diff_weights_pd); - - auto bnrm_fwd_desc = batch_normalization_forward::desc( - prop_kind::forward_training, src.GetUsrMemDesc(), epsilon_, - is_training_ ? use_scale_shift - : (use_scale_shift | use_global_stats)); - auto bnrm_fwd_pd = batch_normalization_forward::primitive_desc( - bnrm_fwd_desc, cpu_engine); + stream(stream::kind::eager).submit(net).wait(); // Indices of output tensors const size_t kDiffSrcIndex = 0; // index of diff_src tensor - // allocate diff_src tensor + // allocate diff_src tensor, always set as MKL-DNN layout MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - - // MKL-DNN's BN primitive not provide API to fetch internal format - // set common_md as OpMem - // src and diff_dst will reorder to common_md - // diff_src will set as common_md - memory::desc common_md({}, memory::data_undef, memory::format_undef); - if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { - if (dnn_shape_src.IsMklTensor()) { - common_md = dnn_shape_src.GetMklLayout(); - } else { - common_md = dnn_shape_diff_dst.GetMklLayout(); - } - } else { - common_md = memory::desc(src_dims, MklDnnType(), - TFDataFormatToMklDnnDataFormat(tensor_format_)); - } - // if any of src and diff_dst as mkl layout, - // then we set diff_src as mkl layout - if (dnn_shape_src.IsMklTensor() || - dnn_shape_diff_dst.IsMklTensor()) { - dnn_shape_diff_src.SetMklTensor(true); - // set diff_src's mkl layout as common_md - auto diff_src_pd = memory::primitive_desc(common_md, cpu_engine); - dnn_shape_diff_src.SetMklLayout(&diff_src_pd); - dnn_shape_diff_src.SetElemType(MklDnnType()); - if (dnn_shape_src.IsMklTensor()) { - dnn_shape_diff_src.SetTfLayout( - dnn_shape_src.GetDimension(), - src_dims, - dnn_shape_src.GetTfDataFormat()); - dnn_shape_diff_src.SetTfDimOrder( - dnn_shape_src.GetDimension(), - tensor_format_); - } else { - dnn_shape_diff_src.SetTfLayout( - dnn_shape_diff_dst.GetDimension(), - src_dims, - dnn_shape_diff_dst.GetTfDataFormat()); - dnn_shape_diff_src.SetTfDimOrder( - dnn_shape_diff_dst.GetDimension(), - tensor_format_); - } - tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); - } else { - dnn_shape_diff_src.SetMklTensor(false); - // both src and diff_dst are TensorFlow layout, - // so it is OK to get TensorFlow shape. - tf_shape_diff_src = src_tensor.shape(); - } + dnn_shape_diff_src.SetMklTensor(true); + auto diff_src_pd = bn_bwd->GetDiffSrcPd(); + dnn_shape_diff_src.SetMklLayout(&diff_src_pd); + dnn_shape_diff_src.SetElemType(MklDnnType()); + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), src_dims, + format_m); + dnn_shape_diff_src.SetTfDimOrder(dnn_shape_src.GetDimension(), + tensor_format_); + tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, tf_shape_diff_src, dnn_shape_diff_src); - // set diff_src - diff_src.SetUsrMem(common_md, diff_src_tensor); - - prop_kind pk = prop_kind::backward; - auto bnrm_bwd_desc = batch_normalization_backward::desc( - pk, common_md, common_md, epsilon_, - /* for inference, specify use_global_stats - 1. on fwd prop, use mean and variance - provided as inputs - 2. on bwd prop, mean and variance are - considered as constants. Thus, - reduce the amout of MKL computations - */ - is_training_ ? use_scale_shift - : (use_scale_shift | use_global_stats)); - auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc( - bnrm_bwd_desc, cpu_engine, bnrm_fwd_pd); - - std::vector net; - src.CheckReorderToOpMem(memory::primitive_desc(common_md, - cpu_engine), &net); - diff_dst.CheckReorderToOpMem(memory::primitive_desc(common_md, - cpu_engine), &net); - - auto bnrm_bwd_op = batch_normalization_backward( - bnrm_bwd_pd, src.GetOpMem(), mean.GetOpMem(), variance.GetOpMem(), - diff_dst.GetOpMem(), weights_m, diff_src.GetOpMem(), diff_weights_m); - net.push_back(bnrm_bwd_op); - stream(stream::kind::eager).submit(net).wait(); + T* mean_data = static_cast(const_cast( + saved_mean_tensor.flat().data())); + T* variance_data = static_cast(const_cast( + saved_variance_tensor.flat().data())); + T* weights_data = weights_data_tf; + T* diff_src_data = static_cast( + diff_src_tensor->flat().data()); + T* diff_weights_data = static_cast( + diff_weights.GetAllocatedBuffer()); + // Execute + bn_bwd->Execute(src_data, mean_data, variance_data, diff_dst_data, + weights_data, diff_src_data, diff_weights_data); // allocate 4 output TF tensors Tensor* diff_scale_tensor = nullptr; @@ -1293,13 +1685,14 @@ class MklFusedBatchNormGradOp : public OpKernel { &diff_shift_tensor); // copy data: diff_scale and diff_shift - T* diff_weights_data_dnn = - reinterpret_cast(diff_weights_m.get_data_handle()); - for (int i = 0; i < depth_; i++) { - diff_scale_tensor->flat().data()[i] = diff_weights_data_dnn[i]; - diff_shift_tensor->flat().data()[i] = - diff_weights_data_dnn[i + depth_]; - } + auto diff_scale_data = diff_scale_tensor->flat().data(); + auto diff_shift_data = diff_shift_tensor->flat().data(); + std::memcpy(reinterpret_cast(diff_scale_data), + reinterpret_cast(diff_weights_data), + depth_ * sizeof(T)); + std::memcpy(reinterpret_cast(diff_shift_data), + reinterpret_cast(diff_weights_data) + depth_, + depth_ * sizeof(T)); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + ", message: " + string(e.message) + ", in file " + @@ -1315,6 +1708,7 @@ class MklFusedBatchNormGradOp : public OpKernel { TensorFormat tensor_format_; int depth_; // batch normalization is done for per channel. bool is_training_; + engine cpu_engine = engine(engine::cpu, 0); void ExtractParams(OpKernelContext* context) { const Tensor& input = MklGetInput(context, 0); @@ -1330,8 +1724,10 @@ class MklFusedBatchNormGradOp : public OpKernel { dnn_shape_diff_src.SetMklTensor(false); AllocateOutputSetMklShape(context, kDiffSrcIndex, diff_src_tensor, tf_shape_src, dnn_shape_diff_src); - for (size_t i = 0; i < (*diff_src_tensor)->shape().num_elements(); i++) - (*diff_src_tensor)->flat().data()[i] = 0; + int num_elements = (*diff_src_tensor)->shape().num_elements(); + auto diff_src_data = (*diff_src_tensor)->flat().data(); + for (size_t i = 0; i < num_elements; i++) + diff_src_data[i] = 0; Tensor* diff_scale_tensor = nullptr; Tensor* diff_shift_tensor = nullptr; @@ -1357,16 +1753,20 @@ class MklFusedBatchNormGradOp : public OpKernel { AllocateOutputSetMklShape(context, kDiffScaleIndex, diff_scale_tensor, tf_shape_scale_shift, mkl_shape_diff_scale); CHECK_NOTNULL(*diff_scale_tensor); - for (size_t i = 0; i < (*diff_scale_tensor)->shape().num_elements(); i++) - (*diff_scale_tensor)->flat().data()[i] = 0; + int diff_scale_num_elements = (*diff_scale_tensor)->shape().num_elements(); + auto diff_scale_data = (*diff_scale_tensor)->flat().data(); + for (size_t i = 0; i < diff_scale_num_elements; i++) + diff_scale_data[i] = 0; MklDnnShape mkl_shape_diff_shift; mkl_shape_diff_shift.SetMklTensor(false); AllocateOutputSetMklShape(context, kDiffShiftIndex, diff_shift_tensor, tf_shape_scale_shift, mkl_shape_diff_shift); CHECK_NOTNULL(*diff_shift_tensor); - for (size_t i = 0; i < (*diff_shift_tensor)->shape().num_elements(); i++) - (*diff_shift_tensor)->flat().data()[i] = 0; + int diff_shift_num_elements = (*diff_shift_tensor)->shape().num_elements(); + auto diff_shift_data = (*diff_shift_tensor)->flat().data(); + for (size_t i = 0; i < diff_shift_num_elements; i++) + diff_shift_data[i] = 0; // Placeholders for estimated_mean and estimated_variance, which are // used for inference and thus not needed here for gradient computation. diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 230b4278ca..5dd2ee4521 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -36,7 +36,7 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" - +#include "tensorflow/core/platform/cpu_info.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" @@ -1482,7 +1482,8 @@ class MklDnnData { /// Operations memory descriptor memory::desc* op_md_; - + /// Operations temp buffer + void* allocated_buffer_; /// CPU engine on which operation will be executed const engine* cpu_engine_; @@ -1491,6 +1492,7 @@ class MklDnnData { : user_memory_(nullptr), reorder_memory_(nullptr), op_md_(nullptr), + allocated_buffer_(nullptr), cpu_engine_(e) {} ~MklDnnData() { @@ -1631,6 +1633,14 @@ class MklDnnData { user_memory_->set_data_handle(GetTensorBuffer(tensor)); } + /// allocate function for data buffer + inline void AllocateBuffer(size_t size) { + allocated_buffer_ = cpu_allocator()->AllocateRaw(64, size); + } + inline void* GetAllocatedBuffer() { + return allocated_buffer_; + } + /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -1794,11 +1804,11 @@ class MklDnnData { } }; -/// Base class for operations with reuse of DNN primitives +/// Base class for operations with reuse of primitives /// -class DnnOp { +class MklPrimitive { public: - virtual ~DnnOp() {} + virtual ~MklPrimitive() {} // Dummy data. Its size, hard-coded as 256 here, does // not matter since MKL should never operate on this buffer. @@ -1808,31 +1818,31 @@ class DnnOp { const mkldnn::memory::dims NONE_DIMS = {}; // This constant is used to declare dummy buffer (size), for MKL primitives template -class DnnOpFactory { +class MklPrimitiveFactory { public: - DnnOpFactory() {} - ~DnnOpFactory() {} + MklPrimitiveFactory() {} + ~MklPrimitiveFactory() {} - DnnOp* GetOp(const std::string& key) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); - if (stream_iter == DnnOpFactory::GetHashMap().end()) { + MklPrimitive* GetOp(const std::string& key) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { return nullptr; } else { return stream_iter->second; } } - void SetOp(const std::string& key, DnnOp* op) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); + void SetOp(const std::string& key, MklPrimitive* op) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - CHECK(stream_iter == DnnOpFactory::GetHashMap().end()); + CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); - DnnOpFactory::GetHashMap()[key] = op; + MklPrimitiveFactory::GetHashMap()[key] = op; } private: - static inline std::unordered_map &GetHashMap() { - static thread_local std::unordered_map map_; + static inline std::unordered_map &GetHashMap() { + static thread_local std::unordered_map map_; return map_; } }; @@ -1877,6 +1887,21 @@ class FactoryKeyCreator { } }; +static inline memory::format get_desired_format(int channel) { + memory::format fmt_desired = memory::format::any; + + if (port::TestCPUFeature(port::CPUFeature::AVX512F) + && (channel % 16) == 0) { + fmt_desired = memory::format::nChw16c; + } else if (port::TestCPUFeature(port::CPUFeature::AVX2) + && (channel % 8) == 0) { + fmt_desired = memory::format::nChw8c; + } else { + fmt_desired = memory::format::nchw; + } + return fmt_desired; +} + #endif // INTEL_MKL_DNN } // namespace tensorflow -- GitLab From f5e2edb5fc84179637355c727c4f0953764b48e5 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 18 May 2018 11:44:15 -0700 Subject: [PATCH 0018/2038] enhancement with pooling ops primitive reuse --- tensorflow/core/kernels/mkl_avgpooling_op.cc | 267 ++++++++------- tensorflow/core/kernels/mkl_conv_ops.cc | 280 +++++++++------- tensorflow/core/kernels/mkl_maxpooling_op.cc | 310 +++++++++-------- .../core/kernels/mkl_pooling_ops_common.cc | 193 ++++++++++- .../core/kernels/mkl_pooling_ops_common.h | 312 +++++++++++++++++- tensorflow/core/util/mkl_util.h | 59 +++- 6 files changed, 1023 insertions(+), 398 deletions(-) diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc index d545d34fdf..7cef2778bf 100644 --- a/tensorflow/core/kernels/mkl_avgpooling_op.cc +++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc @@ -442,22 +442,27 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { void Compute(OpKernelContext* context) override { try { - auto cpu_engine = engine(engine::cpu, 0); - const Tensor& input_tensor = - MklGetInput(context, this->kInputTensorIndexInput); + const Tensor& input_tensor = MklGetInput(context, + this->kInputTensorIndexInput); MklDnnShape dnn_shape_input; GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input); this->SanityCheckInput(context, input_tensor, dnn_shape_input); if (!context->status().ok()) return; - MklDnnData dnn_data_input(&cpu_engine); - MklDnnData dnn_data_output(&cpu_engine); + MklDnnData dnn_data_input(&cpu_engine_); // initialize variables for the pooling op MklPoolParameters pool_params; // Get the input tensor and initialize the pooling parameters - this->ConfigureInput(context, dnn_shape_input, input_tensor, &pool_params, - &dnn_data_input); + TensorShape input_tensor_shape = input_tensor.shape(); + this->InitMklPoolParameters(context, &pool_params, + dnn_shape_input, input_tensor_shape); + // Get the input memory descriptor + memory::desc input_md = dnn_shape_input.IsMklTensor() + ? dnn_shape_input.GetMklLayout() + : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, + this->data_format_tf_), + MklDnnType(), this->data_format_mkldnn_); OP_REQUIRES_OK(context, context->status()); // Declare output tensor @@ -487,45 +492,58 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { return; } - // If input is in Mkl layout, then just get the memory format from it - // directly, instead of using input data_format to AvgPool. - if (dnn_shape_input.IsMklTensor()) { - dnn_data_output.SetUsrMem( - output_dims_mkl_order, - static_cast( - dnn_data_input.GetUsrMemDesc().data.format)); + // Get src/filter/stride/padding information + memory::dims src_dims = dnn_shape_input.IsMklTensor() + ? dnn_shape_input.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + this->data_format_tf_); + + memory::dims filter_dims = memory::dims({pool_params.window_rows, + pool_params.window_cols}); + memory::dims strides = memory::dims( + {pool_params.row_stride, pool_params.col_stride}); + memory::dims padding_left = memory::dims( + {static_cast(pool_params.pad_top), + static_cast(pool_params.pad_left)}); + memory::dims padding_right = memory::dims( + {static_cast(pool_params.pad_bottom), + static_cast(pool_params.pad_right)}); + + // Get an average pooling primitive from the op pool + MklPoolingFwdPrimitive *pooling_fwd = nullptr; + MklPoolingParams fwdParams(src_dims, output_dims_mkl_order, filter_dims, + strides, padding_left, padding_right, + algorithm::pooling_avg_exclude_padding); + pooling_fwd = MklPoolingFwdPrimitiveFactory::Get(fwdParams); + + // allocate output tensor + this->AllocateOutputTensor(context, *(pooling_fwd->GetPoolingFwdPd()), + output_dims_mkl_order, this->data_format_mkldnn_, &output_tensor); + CHECK_NOTNULL(output_tensor); + + OP_REQUIRES_OK(context, context->status()); + // check whether we need to reorder src + std::vector net; + T* src_data = nullptr; + if (input_md.data.format != pooling_fwd->GetSrcMemoryFormat()) { + dnn_data_input.SetUsrMem(input_md, &input_tensor); + auto src_target_primitive_desc = memory::primitive_desc({{src_dims}, + MklDnnType(), pooling_fwd->GetSrcMemoryFormat()}, cpu_engine_); + dnn_data_input.CheckReorderToOpMem(src_target_primitive_desc, &net); + src_data = static_cast( + dnn_data_input.GetOpMem().get_data_handle()); } else { - dnn_data_output.SetUsrMem(output_dims_mkl_order, - this->data_format_mkldnn_); + src_data = static_cast(const_cast( + input_tensor.flat().data())); } + stream(stream::kind::eager).submit(net).wait(); - // describe the memory layout - dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); - - // 3. create a pooling primitive descriptor - auto pool_desc = pooling_forward::desc( - prop_kind::forward, algorithm::pooling_avg_exclude_padding, - dnn_data_input.GetUsrMemDesc(), dnn_data_output.GetUsrMemDesc(), - memory::dims({pool_params.row_stride, pool_params.col_stride}), - memory::dims({pool_params.window_rows, pool_params.window_cols}), - memory::dims({static_cast(pool_params.pad_top), - static_cast(pool_params.pad_left)}), - memory::dims({static_cast(pool_params.pad_bottom), - static_cast(pool_params.pad_right)}), - TFPaddingToMklDnnPadding(this->padding_)); - auto pool_prim_desc = - pooling_forward::primitive_desc(pool_desc, cpu_engine); - - this->AllocateOutputTensor(context, pool_prim_desc, output_dims_mkl_order, - this->data_format_mkldnn_, &output_tensor); - CHECK_NOTNULL(output_tensor); - - OP_REQUIRES_OK(context, context->status()); - dnn_data_output.SetUsrMemDataHandle(output_tensor); + T* dst_data = static_cast( + const_cast(output_tensor->flat().data())); - this->PrepareAndExecuteNet(pool_prim_desc, &dnn_data_input, - &dnn_data_output); + // execute pooling + pooling_fwd->Execute(src_data, dst_data); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + ", message: " + string(e.message) + ", in file " + @@ -535,9 +553,10 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { errors::Aborted("Operation received an exception:", error_msg)); } } // Compute -}; // MklAvgPoolingOp -//----------------------------------------------------------------------------- + private: + engine cpu_engine_ = engine(engine::cpu, 0); +}; // MklAvgPoolingOp template class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { @@ -547,91 +566,84 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { void Compute(OpKernelContext* context) override { try { - auto cpu_engine = engine(engine::cpu, 0); - MklDnnShape original_input_mkl_shape, input_gradient_mkl_shape; - const Tensor& tensor_in_shape = + const Tensor& orig_input_tensor = MklGetInput(context, kInputTensorIndexInputShape); - const Tensor& input_gradient_tensor = + const Tensor& grad_tensor = MklGetInput(context, kInputTensorIndexInputGradient); + + MklDnnShape orig_input_mkl_shape, grad_mkl_shape; GetMklShape(context, kInputTensorIndexInputShape, - &original_input_mkl_shape); + &orig_input_mkl_shape); GetMklShape(context, kInputTensorIndexInputGradient, - &input_gradient_mkl_shape); - - SanityCheckInputs(context, tensor_in_shape, input_gradient_tensor, - original_input_mkl_shape, input_gradient_mkl_shape); + &grad_mkl_shape); if (!context->status().ok()) return; // Used to allocate output_diff_src/diff_src - // and create pool_fwd mdm desc - // 0. Input("orig_input_shape: int32") //NOT a T Tensor! - // 1. Input("grad: T") - - MklDnnData input_gradient_diff_dst(&cpu_engine); - MklDnnData output_diff_src(&cpu_engine); - Tensor* output_tensor_diff_src = nullptr; - TensorShape original_input_shape; + MklDnnData grad_dnn_data(&cpu_engine_); MklPoolParameters pool_params; - memory::dims output_dims_mkl_order, original_input_dims_nchw; - // Configure the original input memory descriptor - memory::desc original_input_md = ConfigureOriginalInput( - context, tensor_in_shape, original_input_mkl_shape, - &original_input_dims_nchw, &pool_params, &original_input_shape); - - // configure the original output memory descriptor - // by definition, the shape of the original output is the same - // as the shape of the gradient diff_dst - memory::desc original_output_md = this->ConfigureOriginalOutput( - pool_params, input_gradient_mkl_shape, output_dims_mkl_order); - - memory::desc target_diff_dst_md = this->ConfigureInputGradient( - input_gradient_mkl_shape, input_gradient_tensor, - &input_gradient_diff_dst, original_output_md); - // The shape of the output diff src needs to be the same shape as the - // original input. But we will set its format to be same as the format of - // input gradient. We won't use format of original input since it will - // always be in Tensorflow layout (given that AvgPoolGrad gets shape of - // the input rather than actual input). - output_diff_src.SetUsrMem( - original_input_dims_nchw, - static_cast(target_diff_dst_md.data.format)); - - // Create the forward pooling primitive descriptor so we can reference it - // in the backward pooling primitive descriptor - auto pool_fwd_desc = pooling_forward::desc( - prop_kind::forward, algorithm::pooling_avg_exclude_padding, - original_input_md, original_output_md, - memory::dims({pool_params.row_stride, pool_params.col_stride}), - memory::dims({pool_params.window_rows, pool_params.window_cols}), - memory::dims({static_cast(pool_params.pad_top), - static_cast(pool_params.pad_left)}), - memory::dims({static_cast(pool_params.pad_bottom), - static_cast(pool_params.pad_right)}), - TFPaddingToMklDnnPadding(this->padding_)); - auto pool_fwd_prim_desc = - pooling_forward::primitive_desc(pool_fwd_desc, cpu_engine); - - auto pool_bkwd_desc = pooling_backward::desc( - algorithm::pooling_avg_exclude_padding, - output_diff_src.GetUsrMemDesc(), target_diff_dst_md, - memory::dims({pool_params.row_stride, pool_params.col_stride}), - memory::dims({pool_params.window_rows, pool_params.window_cols}), - memory::dims({static_cast(pool_params.pad_top), - static_cast(pool_params.pad_left)}), - memory::dims({static_cast(pool_params.pad_bottom), - static_cast(pool_params.pad_right)}), - TFPaddingToMklDnnPadding(this->padding_)); - auto pool_bkwd_prim_desc = pooling_backward::primitive_desc( - pool_bkwd_desc, cpu_engine, pool_fwd_prim_desc); - this->AllocateOutputTensor( - context, pool_bkwd_prim_desc, original_input_dims_nchw, - this->data_format_mkldnn_, &output_tensor_diff_src); - - output_diff_src.SetUsrMemDataHandle(output_tensor_diff_src); - - this->PrepareAndExecuteNet( - pool_bkwd_prim_desc, &input_gradient_diff_dst, &output_diff_src, - memory::primitive_desc(target_diff_dst_md, cpu_engine)); + auto shape_vec = orig_input_tensor.vec(); + TensorShape orig_input_shape; + for (int i = 0; i < orig_input_tensor.NumElements(); i++) { + orig_input_shape.AddDim(shape_vec(i)); + } + this->InitMklPoolParameters(context, &pool_params, orig_input_mkl_shape, + orig_input_shape); + memory::dims filter_dims = memory::dims( + {pool_params.window_rows, pool_params.window_cols}); + memory::dims strides = memory::dims( + {pool_params.row_stride, pool_params.col_stride}); + memory::dims padding_left = memory::dims( + {static_cast(pool_params.pad_top), + static_cast(pool_params.pad_left)}); + memory::dims padding_right = memory::dims( + {static_cast(pool_params.pad_bottom), + static_cast(pool_params.pad_right)}); + memory::dims orig_input_dims_mkl_order = + orig_input_mkl_shape.IsMklTensor() + ? orig_input_mkl_shape.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(orig_input_shape, this->data_format_tf_); + memory::dims diff_dst_dims = grad_mkl_shape.IsMklTensor() + ? grad_mkl_shape.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + this->data_format_tf_); + memory::dims output_dims_mkl_order; + this->GetOutputDims(pool_params, &output_dims_mkl_order); + + MklPoolingParams bwdParams(orig_input_dims_mkl_order, + output_dims_mkl_order, filter_dims, strides, + padding_left, padding_right, algorithm::pooling_avg_exclude_padding); + MklPoolingBwdPrimitive *pooling_bwd = + MklPoolingBwdPrimitiveFactory::Get(bwdParams); + + Tensor* output_tensor = nullptr; + this->AllocateOutputTensor(context, *(pooling_bwd->GetPoolingBwdPd()), + orig_input_dims_mkl_order, + this->data_format_mkldnn_, &output_tensor); + // get diff_dst memory::desc + memory::desc diff_dst_md = grad_mkl_shape.IsMklTensor() + ? grad_mkl_shape.GetMklLayout() + : memory::desc(diff_dst_dims, MklDnnType(), + this->data_format_mkldnn_); + // Check whether we need to reorder diff_dst + T* diff_dst_data = nullptr; + std::vector net; + if (diff_dst_md.data.format != pooling_bwd->GetDiffDstFormat()) { + auto target_diff_dst = memory::primitive_desc({{diff_dst_dims}, + MklDnnType(), pooling_bwd->GetDiffDstFormat()}, cpu_engine_); + grad_dnn_data.SetUsrMem(diff_dst_md, &grad_tensor); + grad_dnn_data.CheckReorderToOpMem(target_diff_dst, &net); + diff_dst_data = static_cast( + grad_dnn_data.GetOpMem().get_data_handle()); + } else { + diff_dst_data = static_cast(const_cast( + grad_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); + T* diff_src_data = static_cast( + const_cast(output_tensor->flat().data())); + + // execute pooling op + pooling_bwd->Execute(diff_dst_data, diff_src_data); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + ", message: " + string(e.message) + ", in file " + @@ -639,7 +651,7 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { OP_REQUIRES_OK(context, errors::Aborted("Compute received an exception:", error_msg)); } - } // Compute + } private: // 0. Input("orig_input_shape: int32") @@ -647,11 +659,14 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { const int kInputTensorIndexInputShape = 0; const int kInputTensorIndexInputGradient = 1; - memory::desc ConfigureOriginalInput( - OpKernelContext* context, const Tensor& tensor_original_input_shape, - const MklDnnShape& original_input_mkl_shape, - memory::dims* original_input_dims_mkl_order, - MklPoolParameters* pool_params, TensorShape* input_tensor_shape) { + engine cpu_engine_ = engine(engine::cpu, 0); + + memory::desc ConfigureOriginalInput(OpKernelContext* context, + const Tensor& tensor_original_input_shape, + const MklDnnShape& original_input_mkl_shape, + memory::dims* original_input_dims_mkl_order, + MklPoolParameters* pool_params, + TensorShape* input_tensor_shape) { CHECK_NOTNULL(original_input_dims_mkl_order); CHECK_NOTNULL(pool_params); CHECK_NOTNULL(input_tensor_shape); diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index f2b14f1278..c032add82e 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,7 +59,8 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -struct ConvFwdDimensions { +// This structure aggregates multiple inputs to Conv2DFwd* methods. +struct MklConvFwdParams { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -69,7 +70,7 @@ struct ConvFwdDimensions { memory::dims padding_left; memory::dims padding_right; - ConvFwdDimensions(memory::dims src_dims, + MklConvFwdParams(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -82,35 +83,40 @@ struct ConvFwdDimensions { }; template -class Conv2DFwd : public DnnOp { +class MklConv2DFwdPrimitive: public MklPrimitive { public: - explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { - fwd_stream_.reset(new stream(stream::kind::eager)); + explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { + context_.fwd_stream.reset(new stream(stream::kind::eager)); // create conv primitive - if (conv_fwd_ == nullptr) { + if (context_.conv_fwd == nullptr) { Setup(convFwdDims); } } - ~Conv2DFwd() {} + ~MklConv2DFwdPrimitive() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - bias_mem_->set_data_handle(static_cast(bias_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); + void Execute(const T* src_data, const T* filter_data, + const T* bias_data, const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.bias_mem->set_data_handle( + static_cast(const_cast(bias_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - bias_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.bias_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } @@ -119,139 +125,174 @@ class Conv2DFwd : public DnnOp { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); - - // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + void Execute(const T* src_data, const T* filter_data, + const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } - // expected memory format for this primitive instance - memory::format src_fmt_; - memory::format filter_fmt_; + memory::format GetSrcMemoryFormat() const { + return context_.src_fmt; + } + + memory::format GetFilterMemoryFormat() const { + return context_.filter_fmt; + } - // convolution primitive - std::shared_ptr fwd_pd_; - std::shared_ptr conv_fwd_; + std::shared_ptr + GetPrimitiveDesc() const { + return context_.fwd_pd; + } private: - void Setup(const ConvFwdDimensions& convFwdDims) { + // Primitive reuse context for Conv2D Fwd op + struct ConvFwdContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format filter_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr filter_mem; + std::shared_ptr bias_mem; + std::shared_ptr dst_mem; + + // desc & prmitive desc + std::shared_ptr fwd_desc; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr filter_md; + std::shared_ptr bias_md; + std::shared_ptr dst_md; + + // convolution primitive + std::shared_ptr fwd_pd; + std::shared_ptr conv_fwd; + + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + ConvFwdContext() : + src_fmt(memory::format::any), filter_fmt(memory::format::any), + src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), + dst_mem(nullptr), fwd_desc(nullptr), + src_md(nullptr), filter_md(nullptr), bias_md(nullptr), + fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + void Setup(const MklConvFwdParams& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - src_md_.reset(new memory::desc({convFwdDims.src_dims}, + context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, + context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, + context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, + context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.bias_md, *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *dst_md_, - convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, - convFwdDims.padding_right, padding_kind::zero)); + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, + convFwdDims.padding_left, convFwdDims.padding_right, + padding_kind::zero)); } - fwd_pd_.reset(new convolution_forward::primitive_desc( - *fwd_desc_, cpu_engine_)); + context_.fwd_pd.reset(new convolution_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); // store the expected memory format - src_fmt_ = static_cast( - fwd_pd_.get()->src_primitive_desc().desc().data.format); + context_.src_fmt = static_cast( + context_.fwd_pd.get()->src_primitive_desc().desc().data.format); - filter_fmt_ = static_cast( - fwd_pd_.get()->weights_primitive_desc().desc().data.format); + context_.filter_fmt = static_cast( + context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); - filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), - DummyData)); - dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); + context_.src_mem.reset(new memory( + context_.fwd_pd.get()->src_primitive_desc(), DummyData)); + context_.filter_mem.reset(new memory( + context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); + context_.dst_mem.reset(new memory( + context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), - memory::format::x}, cpu_engine_}, DummyData)); - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *bias_mem_, *dst_mem_)); + context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, + MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, + *context_.bias_mem, *context_.dst_mem)); } else { - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *dst_mem_)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, + *context_.filter_mem, *context_.dst_mem)); } - fwd_primitives_.push_back(*conv_fwd_); + context_.fwd_primitives.push_back(*context_.conv_fwd); return; } - - // MKLDNN memory - std::shared_ptr src_mem_; - std::shared_ptr filter_mem_; - std::shared_ptr bias_mem_; - std::shared_ptr dst_mem_; - - std::shared_ptr fwd_stream_; - std::vector fwd_primitives_; - - // desc & prmitive desc - std::shared_ptr fwd_desc_; - - // memory desc - std::shared_ptr src_md_; - std::shared_ptr filter_md_; - std::shared_ptr bias_md_; - std::shared_ptr dst_md_; - - engine cpu_engine_ = engine(engine::cpu, 0); }; template -class Conv2DFwdFactory : public DnnOpFactory { +class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { public: - static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { - Conv2DFwd* conv2d_fwd = nullptr; + static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { + MklConv2DFwdPrimitive* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( + convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new Conv2DFwd(convFwdDims); - Conv2DFwdFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); + MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - Conv2DFwdFactory() {} - ~Conv2DFwdFactory() {} + MklConv2DFwdPrimitiveFactory() {} + ~MklConv2DFwdPrimitiveFactory() {} static const int kDilationH = 0, kDilationW = 1; - static Conv2DFwdFactory& GetInstance() { - static Conv2DFwdFactory instance_; + static MklConv2DFwdPrimitiveFactory& GetInstance() { + static MklConv2DFwdPrimitiveFactory instance_; return instance_; } - static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { + static std::string CreateKey(const MklConvFwdParams& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -266,12 +307,12 @@ class Conv2DFwdFactory : public DnnOpFactory { return key_creator.GetKey(); } - DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { + MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { + void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -762,7 +803,6 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); - MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -812,7 +852,6 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); - src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -820,29 +859,28 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); - filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - Conv2DFwd *conv2d_fwd = nullptr; + MklConv2DFwdPrimitive *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, + MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } else { - ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, + MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->fwd_pd_; + conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -854,20 +892,30 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - if (src_md.data.format != conv2d_fwd->src_fmt_) - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - - if (filter_md.data.format != conv2d_fwd->filter_fmt_) - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); + T *src_data = nullptr; + if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast(const_cast( + src_tensor.flat().data())); + } + T *filter_data = nullptr; + if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { + filter.SetUsrMem(filter_md, &filter_tensor); + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); + filter_data = static_cast(filter.GetOpMem().get_data_handle()); + } else { + filter_data = static_cast(const_cast( + filter_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); - T* src_data = static_cast( - src.GetOpMem().get_data_handle()); - T* filter_data = static_cast( - filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index ea537524b1..1726521539 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -71,6 +71,7 @@ class MklMaxPoolingOp : public OpKernel { // attribute value. OP_REQUIRES_OK(context, context->GetAttr("workspace_enabled", &workspace_enabled_)); + } void Compute(OpKernelContext* context) override { @@ -119,6 +120,7 @@ class MklMaxPoolingOp : public OpKernel { mkl_out_shape); Tensor* workspace_tensor; + void* workspace_buf = nullptr; TensorShape workspace_shape; mkl_workspace_shape.SetMklTensor(false); @@ -510,9 +512,8 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { void Compute(OpKernelContext* context) override { try { - auto cpu_engine = engine(engine::cpu, 0); - const Tensor& input_tensor = - MklGetInput(context, this->kInputTensorIndexInput); + const Tensor& input_tensor = MklGetInput(context, + this->kInputTensorIndexInput); MklDnnShape dnn_shape_input; GetMklShape(context, this->kInputTensorIndexInput, &dnn_shape_input); this->SanityCheckInput(context, input_tensor, dnn_shape_input); @@ -525,53 +526,82 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { // initialize variables for the pooling op MklPoolParameters pool_params; // Get the input tensor and initialize the pooling parameters - this->ConfigureInput(context, dnn_shape_input, input_tensor, &pool_params, - &dnn_data_input); + TensorShape input_tensor_shape = input_tensor.shape(); + this->InitMklPoolParameters(context, &pool_params, + dnn_shape_input, input_tensor_shape); + // Get the input memory descriptor + memory::desc input_md = dnn_shape_input.IsMklTensor() + ? dnn_shape_input.GetMklLayout() + : memory::desc(TFShapeToMklDnnDimsInNCHW( + input_tensor_shape, + this->data_format_tf_), + MklDnnType(), + this->data_format_mkldnn_); OP_REQUIRES_OK(context, context->status()); + // Get src/filter/stride/padding information + memory::dims src_dims = dnn_shape_input.IsMklTensor() + ? dnn_shape_input.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + this->data_format_tf_); + + memory::dims filter_dims = memory::dims({pool_params.window_rows, + pool_params.window_cols}); + memory::dims strides = memory::dims({pool_params.row_stride, + pool_params.col_stride}); + memory::dims padding_left = memory::dims( + {static_cast(pool_params.pad_top), + static_cast(pool_params.pad_left)}); + memory::dims padding_right = memory::dims( + {static_cast(pool_params.pad_bottom), + static_cast(pool_params.pad_right)}); + // Declare output tensor Tensor* output_tensor = nullptr; memory::dims output_dims_mkl_order; this->GetOutputDims(pool_params, &output_dims_mkl_order); - // If input is in Mkl layout, then just get the memory format from it - // directly, instead of using input data_format to MaxPool. - if (dnn_shape_input.IsMklTensor()) { - dnn_data_output.SetUsrMem( - output_dims_mkl_order, - static_cast( - dnn_data_input.GetUsrMemDesc().data.format)); - } else { - dnn_data_output.SetUsrMem(output_dims_mkl_order, - this->data_format_mkldnn_); - } + // Get a pooling op from the cached pool + MklPoolingFwdPrimitive *pooling_fwd = nullptr; + MklPoolingParams fwdParams(src_dims, output_dims_mkl_order, filter_dims, + strides, padding_left, padding_right, algorithm::pooling_max); + pooling_fwd = MklPoolingFwdPrimitiveFactory::Get(fwdParams); - // describe the memory layout; let mkl-dnn choose the best for the op - dnn_data_output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); - - auto pool_desc = pooling_forward::desc( - prop_kind::forward, algorithm::pooling_max, - dnn_data_input.GetUsrMemDesc(), dnn_data_output.GetUsrMemDesc(), - memory::dims({pool_params.row_stride, pool_params.col_stride}), - memory::dims({pool_params.window_rows, pool_params.window_cols}), - memory::dims({static_cast(pool_params.pad_top), - static_cast(pool_params.pad_left)}), - memory::dims({static_cast(pool_params.pad_bottom), - static_cast(pool_params.pad_right)}), - TFPaddingToMklDnnPadding(this->padding_)); - auto pool_fwd_desc = - pooling_forward::primitive_desc(pool_desc, cpu_engine); - - this->AllocateOutputTensor(context, pool_fwd_desc, output_dims_mkl_order, - this->data_format_mkldnn_, &output_tensor); + // allocate output tensor + this->AllocateOutputTensor(context, *(pooling_fwd->GetPoolingFwdPd()), + output_dims_mkl_order, this->data_format_mkldnn_, &output_tensor); OP_REQUIRES_OK(context, context->status()); - dnn_data_output.SetUsrMemDataHandle(output_tensor); + dnn_data_output.SetUsrMem(output_dims_mkl_order, + pooling_fwd->GetDstMemoryFormat(), output_tensor); - AllocateWorkspaceTensor(context, pool_fwd_desc, &dnn_data_wksp); + AllocateWorkspaceTensor(context, *(pooling_fwd->GetPoolingFwdPd()), + &dnn_data_wksp); OP_REQUIRES_OK(context, context->status()); - this->PrepareAndExecuteNet(pool_fwd_desc, &dnn_data_input, - &dnn_data_output, &dnn_data_wksp); + // check wehther we need to reorder src + std::vector net; + T* src_data = nullptr; + if (input_md.data.format != pooling_fwd->GetSrcMemoryFormat()) { + dnn_data_input.SetUsrMem(input_md, &input_tensor); + auto src_target_primitive_desc = memory::primitive_desc( + {{src_dims}, MklDnnType(), pooling_fwd->GetSrcMemoryFormat()}, + cpu_engine); + dnn_data_input.CheckReorderToOpMem(src_target_primitive_desc, &net); + src_data = static_cast( + dnn_data_input.GetOpMem().get_data_handle()); + } else { + src_data = static_cast(const_cast( + input_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); + + T* dst_data = static_cast( + const_cast(output_tensor->flat().data())); + T* ws_data = static_cast( + dnn_data_wksp.GetOpMem().get_data_handle()); + + // execute pooling op + pooling_fwd->Execute(src_data, dst_data, ws_data); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + ", message: " + string(e.message) + ", in file " + @@ -579,30 +609,30 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { OP_REQUIRES_OK(context, errors::Aborted("Compute received an exception:", error_msg)); } - } // Compute + } private: - const int kOutputTensorIndexWorkspace = 1; - - void AllocateWorkspaceTensor( - OpKernelContext* context, - const pooling_forward::primitive_desc& pool_fwd_prim_desc, - MklDnnData* dnn_data_wksp) { - CHECK_NOTNULL(dnn_data_wksp); - Tensor* workspace_tensor = nullptr; - memory::primitive_desc workspace_pd = - pool_fwd_prim_desc.workspace_primitive_desc(); - size_t workspace_bytes = workspace_pd.get_size(); - MklDnnShape workspace_mkl_shape; - workspace_mkl_shape.SetMklTensor(false); - TensorShape workspace_tf_shape; - workspace_tf_shape.AddDim(workspace_bytes); - AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace, - &workspace_tensor, workspace_tf_shape, - workspace_mkl_shape); - CHECK_NOTNULL(workspace_tensor); - dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor); - } + const int kOutputTensorIndexWorkspace = 1; + engine cpu_engine = engine(engine::cpu, 0); + + void AllocateWorkspaceTensor(OpKernelContext* context, + const pooling_forward::primitive_desc& pool_fwd_prim_desc, + MklDnnData* dnn_data_wksp) { + CHECK_NOTNULL(dnn_data_wksp); + Tensor* workspace_tensor = nullptr; + memory::primitive_desc workspace_pd + = pool_fwd_prim_desc.workspace_primitive_desc(); + size_t workspace_bytes = workspace_pd.get_size(); + MklDnnShape workspace_mkl_shape; + workspace_mkl_shape.SetMklTensor(false); + TensorShape workspace_tf_shape; + workspace_tf_shape.AddDim(workspace_bytes); + AllocateOutputSetMklShape(context, kOutputTensorIndexWorkspace, + &workspace_tensor, + workspace_tf_shape, workspace_mkl_shape); + CHECK_NOTNULL(workspace_tensor); + dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor); + } }; // The operation to compute MaxPool gradients. @@ -616,98 +646,112 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { public: explicit MklMaxPoolingGradOp(OpKernelConstruction* context) : MklPoolingBackwardOpBase(context) {} - void Compute(OpKernelContext* context) override { try { auto cpu_engine = engine(engine::cpu, 0); const Tensor& orig_input_tensor = MklGetInput(context, kInputTensorIndexOrigInput); - const Tensor& orig_output_tensor = - MklGetInput(context, kInputTensorIndexOrigOutput); const Tensor& grad_tensor = MklGetInput(context, kInputTensorIndexGradient); const Tensor& workspace_tensor = MklGetInput(context, kInputTensorIndexWorkspace); - MklDnnShape orig_input_mkl_shape, orig_output_mkl_shape, grad_mkl_shape, - workspace_mkl_shape; + MklDnnShape orig_input_mkl_shape, grad_mkl_shape; GetMklShape(context, kInputTensorIndexOrigInput, &orig_input_mkl_shape); - GetMklShape(context, kInputTensorIndexOrigOutput, &orig_output_mkl_shape); GetMklShape(context, kInputTensorIndexGradient, &grad_mkl_shape); - GetMklShape(context, kInputTensorIndexWorkspace, &workspace_mkl_shape); - - SanityCheckInputs(context, orig_input_tensor, orig_output_tensor, - grad_tensor, workspace_tensor, orig_input_mkl_shape, - orig_output_mkl_shape, grad_mkl_shape, - workspace_mkl_shape); if (!context->status().ok()) return; MklDnnData grad_dnn_data(&cpu_engine); MklDnnData workspace_dnn_data(&cpu_engine); - MklDnnData output_dnn_data(&cpu_engine); - Tensor* output_tensor = nullptr; + MklPoolParameters pool_params; - TensorShape orig_input_shape; - memory::dims output_dims_mkl_order, orig_input_dims_mkl_order; - memory::desc original_input_md = ConfigureOriginalInput( - context, orig_input_tensor, orig_input_mkl_shape, - &orig_input_dims_mkl_order, &pool_params, &orig_input_shape); - - memory::desc original_output_md = this->ConfigureOriginalOutput( - pool_params, orig_output_mkl_shape, output_dims_mkl_order); - - memory::desc target_diff_dst_md = this->ConfigureInputGradient( - grad_mkl_shape, grad_tensor, &grad_dnn_data, original_output_md); - - output_dnn_data.SetUsrMem(original_input_md); - - // Create the forward pooling primitive descriptor so we can - // pass it as a hint to the backward pooling primitive descriptor - auto pool_fwd_desc = pooling_forward::desc( - prop_kind::forward, algorithm::pooling_max, original_input_md, - original_output_md, - memory::dims({pool_params.row_stride, pool_params.col_stride}), - memory::dims({pool_params.window_rows, pool_params.window_cols}), - memory::dims({static_cast(pool_params.pad_top), - static_cast(pool_params.pad_left)}), - memory::dims({static_cast(pool_params.pad_bottom), - static_cast(pool_params.pad_right)}), - TFPaddingToMklDnnPadding(this->padding_)); - auto pool_fwd_prim_desc = - pooling_forward::primitive_desc(pool_fwd_desc, cpu_engine); - - auto pool_bkwd_desc = pooling_backward::desc( - algorithm::pooling_max, output_dnn_data.GetUsrMemDesc(), - target_diff_dst_md, - memory::dims({pool_params.row_stride, pool_params.col_stride}), - memory::dims({pool_params.window_rows, pool_params.window_cols}), - memory::dims({static_cast(pool_params.pad_top), - static_cast(pool_params.pad_left)}), - memory::dims({static_cast(pool_params.pad_bottom), - static_cast(pool_params.pad_right)}), - TFPaddingToMklDnnPadding(this->padding_)); - auto pool_bkwd_prim_desc = pooling_backward::primitive_desc( - pool_bkwd_desc, cpu_engine, pool_fwd_prim_desc); - - this->AllocateOutputTensor(context, pool_bkwd_prim_desc, + TensorShape orig_input_shape = orig_input_tensor.shape(); + this->InitMklPoolParameters(context, &pool_params, + orig_input_mkl_shape, orig_input_shape); + + memory::dims filter_dims = memory::dims( + {pool_params.window_rows, pool_params.window_cols}); + memory::dims strides = memory::dims( + {pool_params.row_stride, pool_params.col_stride}); + memory::dims padding_left = memory::dims( + {static_cast(pool_params.pad_top), + static_cast(pool_params.pad_left)}); + memory::dims padding_right = memory::dims( + {static_cast(pool_params.pad_bottom), + static_cast(pool_params.pad_right)}); + memory::dims diff_dst_dims = grad_mkl_shape.IsMklTensor() + ? grad_mkl_shape.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + this->data_format_tf_); + memory::dims orig_input_dims_mkl_order = + orig_input_mkl_shape.IsMklTensor() + ? orig_input_mkl_shape.GetSizesAsMklDnnDims() + : TFShapeToMklDnnDimsInNCHW(orig_input_shape, + this->data_format_tf_); + + memory::dims output_dims_mkl_order; + this->GetOutputDims(pool_params, &output_dims_mkl_order); + + MklPoolingParams bwdParams(orig_input_dims_mkl_order, + output_dims_mkl_order, filter_dims, strides, + padding_left, padding_right, algorithm::pooling_max); + MklPoolingBwdPrimitive *pooling_bwd = + MklPoolingBwdPrimitiveFactory::Get(bwdParams); + + // allocate output tensor and memory primitive + Tensor* output_tensor = nullptr; + this->AllocateOutputTensor(context, *(pooling_bwd->GetPoolingBwdPd()), orig_input_dims_mkl_order, this->data_format_mkldnn_, &output_tensor); - output_dnn_data.SetUsrMemDataHandle(output_tensor); - - ConfigureWorkspace(workspace_tensor, - pool_fwd_prim_desc.workspace_primitive_desc(), - &workspace_dnn_data); - this->PrepareAndExecuteNet( - pool_bkwd_prim_desc, &grad_dnn_data, &output_dnn_data, - memory::primitive_desc(target_diff_dst_md, cpu_engine), - &workspace_dnn_data); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK(context, errors::Aborted("Compute received an exception:", - error_msg)); + // get diff_dst mem desc + memory::desc diff_dst_md = grad_mkl_shape.IsMklTensor() + ? grad_mkl_shape.GetMklLayout() + : memory::desc(diff_dst_dims, MklDnnType(), + this->data_format_mkldnn_); + // check if diff_dst needs to be reordered + T* diff_dst_data = nullptr; + std::vector net; + if (diff_dst_md.data.format != pooling_bwd->GetDiffDstFormat()) { + auto target_diff_dst = memory::primitive_desc({{diff_dst_dims}, + MklDnnType(), pooling_bwd->GetDiffDstFormat()}, cpu_engine); + grad_dnn_data.SetUsrMem(diff_dst_md, &grad_tensor); + grad_dnn_data.CheckReorderToOpMem(target_diff_dst, &net); + diff_dst_data = static_cast( + grad_dnn_data.GetOpMem().get_data_handle()); + } else { + diff_dst_data = static_cast( + const_cast(grad_tensor.flat().data())); + } + void* ws_data = nullptr; + auto ws_md = + pooling_bwd->GetPoolingFwdPd()->workspace_primitive_desc().desc(); + if (ws_md.data.format != pooling_bwd->GetWorkspaceFormat()) { + memory::dims ws_dims; + ws_dims.assign(ws_md.data.dims, ws_md.data.dims + ws_md.data.ndims); + auto target_ws = memory::primitive_desc({{ws_dims}, + pooling_bwd->GetWorkspaceDataType(), + pooling_bwd->GetWorkspaceFormat()}, cpu_engine); + workspace_dnn_data.SetUsrMem(ws_md, &workspace_tensor); + workspace_dnn_data.CheckReorderToOpMem(target_ws, &net); + ws_data = workspace_dnn_data.GetOpMem().get_data_handle(); + } else { + ws_data = static_cast(const_cast( + workspace_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); + + T* diff_src_data = static_cast( + const_cast(output_tensor->flat().data())); + + // execute pooling + pooling_bwd->Execute(diff_dst_data, diff_src_data, ws_data); + } catch (mkldnn::error &e) { + string error_msg = "Status:" + std::to_string(e.status) + + ", message: " + string(e.message) + ". in file " + + string(__FILE__) + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK(context, errors::Aborted( + "Compute received an exception:", error_msg)); } - } // Compute + } private: // .Input("orig_input: T") diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc index 5ef6ce2a57..df58a1edec 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -24,12 +24,195 @@ limitations under the License. namespace tensorflow { +#ifndef INTEL_MKL_ML + +using mkldnn::pooling_max; +using mkldnn::pooling_avg; +using mkldnn::pooling_avg_include_padding; +using mkldnn::pooling_avg_exclude_padding; +using mkldnn::prop_kind; + +template +void MklPoolingFwdPrimitive::Setup(const MklPoolingParams& fwdParams) { + if (fwdParams.alg_kind != pooling_max && + fwdParams.alg_kind != pooling_avg && + fwdParams.alg_kind != pooling_avg_include_padding && + fwdParams.alg_kind != pooling_avg_exclude_padding) { + assert("Pooling algorithm kind is not supported\n"); + } + + context_.alg_kind = fwdParams.alg_kind; + // create memory desc + // FIXME: Pooling doesn't expose to get the src_primitive_desc, + // so src format is currently hard-coded. + // A utility function is used to do this, + // which may be broken with future CPU architectures + context_.src_md.reset(new memory::desc({fwdParams.src_dims}, + MklDnnType(), get_desired_format(fwdParams.src_dims[1]))); + context_.dst_md.reset(new memory::desc({fwdParams.dst_dims}, + MklDnnType(), memory::format::any)); + + // create a pooling descriptor + context_.fwd_desc.reset(new pooling_forward::desc(prop_kind::forward_training, + fwdParams.alg_kind, *context_.src_md, *context_.dst_md, fwdParams.strides, + fwdParams.filter_dims, fwdParams.padding_left, + fwdParams.padding_right, padding_kind::zero)); + context_.fwd_pd.reset( + new pooling_forward::primitive_desc(*context_.fwd_desc, cpu_engine_)); + + // store expected primitive format + context_.src_fmt = get_desired_format(fwdParams.src_dims[1]); + context_.dst_fmt = static_cast( + context_.fwd_pd.get()->dst_primitive_desc().desc().data.format); + + // create MKL-DNN internal memory object with dummy data + context_.src_mem.reset( + new memory({{{fwdParams.src_dims}, MklDnnType(), context_.src_fmt}, + cpu_engine_}, DummyData)); + context_.dst_mem.reset( + new memory(context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + + // for max pooling, need to return workspace(ws) for backward computing + if (fwdParams.alg_kind == pooling_max) { + auto ws_pd = context_.fwd_pd.get()->workspace_primitive_desc().desc().data; + // store workspace's dims and format to create workspace tensor + context_.ws_fmt = static_cast(ws_pd.format); + context_.ws_dims.assign(ws_pd.dims, ws_pd.dims + ws_pd.ndims); + context_.ws_dt = static_cast(ws_pd.data_type); + context_.ws_size = + context_.fwd_pd.get()->workspace_primitive_desc().get_size(); + context_.ws_mem.reset( + new memory(context_.fwd_pd.get()->workspace_primitive_desc(), + DummyData)); + context_.fwd.reset(new pooling_forward(*context_.fwd_pd, + *context_.src_mem, *context_.dst_mem, *context_.ws_mem)); + } else { + context_.fwd.reset(new pooling_forward(*context_.fwd_pd, + *context_.src_mem, *context_.dst_mem)); + } + + context_.fwd_primitives.push_back(*context_.fwd); + return; +} + +template +void MklPoolingFwdPrimitive::Execute(const T* src_data, const T* dst_data, + const void* ws_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + if (context_.alg_kind == pooling_max) { // max pooling must have ws + assert(ws != nullptr); + context_.ws_mem->set_data_handle(const_cast(ws_data)); + } + context_.fwd_stream->submit(context_.fwd_primitives); + + // set back data handle + context_.src_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); + if (context_.alg_kind == pooling_max) { // max pooling must have ws + assert(ws != nullptr); + context_.ws_mem->set_data_handle(DummyData); + } + return; +} + +template class MklPoolingFwdPrimitive; + +template +void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { + if (bwdParams.alg_kind != pooling_max && bwdParams.alg_kind != pooling_avg + && bwdParams.alg_kind != pooling_avg_include_padding + && bwdParams.alg_kind != pooling_avg_exclude_padding) { + assert("Pooling algorithm kind is not supported\n"); + } + context_.alg_kind = bwdParams.alg_kind; + + // Create memory desc + context_.diff_src_md.reset(new memory::desc({bwdParams.src_dims}, + MklDnnType(), memory::format::any)); + context_.diff_dst_md.reset(new memory::desc({bwdParams.dst_dims}, + MklDnnType(), get_desired_format(bwdParams.dst_dims[1]))); + context_.bwd_desc.reset(new pooling_backward::desc(bwdParams.alg_kind, + *context_.diff_src_md, *context_.diff_dst_md, bwdParams.strides, + bwdParams.filter_dims, bwdParams.padding_left, bwdParams.padding_right, + padding_kind::zero)); + + // create a forward primitive, + // which will be used as a hint for creating backward primitive + context_.fwd_desc.reset(new pooling_forward::desc(prop_kind::forward_training, + bwdParams.alg_kind, *context_.diff_src_md, *context_.diff_dst_md, + bwdParams.strides, bwdParams.filter_dims, bwdParams.padding_left, + bwdParams.padding_right, padding_kind::zero)); + context_.fwd_pd.reset( + new pooling_forward::primitive_desc(*context_.fwd_desc, cpu_engine)); + context_.bwd_pd.reset(new pooling_backward::primitive_desc( + *context_.bwd_desc, cpu_engine, *context_.fwd_pd)); + + // store expected primitive format + context_.diff_src_fmt = static_cast( + context_.bwd_pd.get()->diff_src_primitive_desc().desc().data.format); + context_.diff_dst_fmt = get_desired_format(bwdParams.dst_dims[1]); + + // create MKL-DNN internal memory object with dummy data + context_.diff_src_mem.reset( + new memory(context_.bwd_pd.get()->diff_src_primitive_desc(), DummyData)); + context_.diff_dst_mem.reset(new memory({{{bwdParams.dst_dims}, + MklDnnType(), context_.diff_dst_fmt}, cpu_engine}, DummyData)); + + // for max pooling, need to return workspace for backward + if (bwdParams.alg_kind == pooling_max) { + auto ws_pd = context_.fwd_pd.get()->workspace_primitive_desc().desc().data; + context_.ws_dims.assign(ws_pd.dims, ws_pd.dims + ws_pd.ndims); + context_.ws_fmt = get_desired_format(context_.ws_dims[1]); + context_.ws_dt = static_cast(ws_pd.data_type); + context_.ws_mem.reset(new memory({{{context_.ws_dims}, context_.ws_dt, + context_.ws_fmt}, cpu_engine}, DummyData)); + context_.bwd.reset(new pooling_backward( + *context_.bwd_pd, *context_.diff_dst_mem, *context_.ws_mem, + *context_.diff_src_mem)); + } else { + context_.bwd.reset(new pooling_backward(*context_.bwd_pd, + *context_.diff_dst_mem, *context_.diff_src_mem)); + } + context_.bwd_primitives.push_back(*context_.bwd); +} + +template +void MklPoolingBwdPrimitive::Execute(const T* diff_dst_data, + const T* diff_src_data, const void* ws_data) { + context_.diff_dst_mem->set_data_handle( + static_cast(const_cast(diff_dst_data))); + context_.diff_src_mem->set_data_handle( + static_cast(const_cast(diff_src_data))); + if (context_.alg_kind == pooling_max) { + assert(ws_data != nullptr); + context_.ws_mem->set_data_handle(const_cast(ws_data)); + } + + context_.bwd_stream->submit(context_.bwd_primitives); + // set back data handle + context_.diff_dst_mem->set_data_handle(DummyData); + context_.diff_src_mem->set_data_handle(DummyData); + if (context_.alg_kind == pooling_max) { + assert(ws_data != nullptr); + context_.ws_mem->set_data_handle(DummyData); + } + return; +} + +template class MklPoolingBwdPrimitive; + +#endif + // Initialization for TensorFlow format -void MklPoolParameters::Init(OpKernelContext* context, - const std::vector& ksize, - const std::vector& stride, Padding padding, - TensorFormat data_format, - const TensorShape& tensor_in_shape) { +void MklPoolParameters::Init( + OpKernelContext* context, + const std::vector& ksize, + const std::vector& stride, Padding padding, + TensorFormat data_format, + const TensorShape& tensor_in_shape) { // For maxpooling, tensor_in should have 4 dimensions. OP_REQUIRES(context, tensor_in_shape.dims() == 4, errors::InvalidArgument("tensor_in must be 4-dimensional")); diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index 279167aba2..468dc41c57 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -19,6 +19,7 @@ limitations under the License. #ifdef INTEL_MKL #include #include +#include #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/padding.h" @@ -32,6 +33,315 @@ using mkldnn::stream; namespace tensorflow { +#ifndef INTEL_MKL_ML + +using mkldnn::memory; +using mkldnn::pooling_max; +using mkldnn::pooling_avg; +using mkldnn::pooling_avg_include_padding; +using mkldnn::pooling_avg_exclude_padding; +using mkldnn::prop_kind; + +struct MklPoolingParams { + memory::dims src_dims; + memory::dims dst_dims; + memory::dims filter_dims; + memory::dims strides; + memory::dims padding_left; + memory::dims padding_right; + mkldnn::algorithm alg_kind; + + MklPoolingParams(memory::dims src_dims, + memory::dims dst_dims, memory::dims filter_dims, + memory::dims strides, memory::dims padding_left, + memory::dims padding_right, mkldnn::algorithm alg_kind) : + src_dims(src_dims), dst_dims(dst_dims), + filter_dims(filter_dims), strides(strides), + padding_left(padding_left), padding_right(padding_right), + alg_kind(alg_kind) { + } +}; + +template +class MklPoolingFwdPrimitive : public MklPrimitive { + public: + explicit MklPoolingFwdPrimitive(const MklPoolingParams& fwdParams) { + context_.fwd_stream.reset(new stream(stream::kind::eager)); + if (context_.fwd == nullptr) + Setup(fwdParams); + } + + ~MklPoolingFwdPrimitive() {} + + // Pooling forward execute + // src_data: input data buffer of src + // ws_data: input data buffer of workspace + // dst_data: output data buffer of dst + void Execute(const T* src_data, const T* dst_data, + const void* ws_data = nullptr); + + std::shared_ptr + GetPoolingFwdPd() const { + return context_.fwd_pd; + } + + memory::format GetSrcMemoryFormat() const { + return context_.src_fmt; + } + + memory::format GetDstMemoryFormat() const { + return context_.dst_fmt; + } + + private: + void Setup(const MklPoolingParams& fwdParams); + + + struct PoolingFwdContext { + // algorithm + mkldnn::algorithm alg_kind; + + // expected memory format + memory::format src_fmt; + memory::format dst_fmt; + memory::format ws_fmt; + + // workspace shape + memory::dims ws_dims; + memory::data_type ws_dt; + size_t ws_size; + + // MKL-DNN memory, just dummy data + std::shared_ptr ws_mem; + std::shared_ptr src_mem; + std::shared_ptr dst_mem; + + // desc & primitive desc + std::shared_ptr fwd_desc; + std::shared_ptr fwd_pd; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr dst_md; + + // Pooling primitive + std::shared_ptr fwd; + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + PoolingFwdContext() : + src_fmt(memory::format::any), dst_fmt(memory::format::any), + ws_fmt(memory::format::any), ws_mem(nullptr), src_mem(nullptr), + dst_mem(nullptr), fwd_desc(nullptr), fwd_pd(nullptr), src_md(nullptr), + dst_md(nullptr), fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +template +class MklPoolingFwdPrimitiveFactory : public MklPrimitiveFactory { + public: + static MklPoolingFwdPrimitive* Get(const MklPoolingParams& fwdParams) { + MklPoolingFwdPrimitive* pooling_forward = nullptr; + + // Get pooling primitive from the pool + pooling_forward = static_cast*>( + MklPoolingFwdPrimitiveFactory::GetInstance().GetPoolingFwd(fwdParams)); + + if (pooling_forward == nullptr) { + pooling_forward = new MklPoolingFwdPrimitive(fwdParams); + MklPoolingFwdPrimitiveFactory::GetInstance().SetPoolingFwd( + fwdParams, pooling_forward); + } + return pooling_forward; + } + + static MklPoolingFwdPrimitiveFactory& GetInstance() { + static MklPoolingFwdPrimitiveFactory instance_; + return instance_; + } + + private: + MklPoolingFwdPrimitiveFactory() {} + ~MklPoolingFwdPrimitiveFactory() {} + + // The key to be created will be used to get/set pooling + // primitive op from reuse perspective. + // A pooling key is a string which concates key parameters + // as well as algorithm kind (max versus avg). + static std::string CreateKey(const MklPoolingParams& fwdParams) { + std::string prefix = "pooling_fwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(fwdParams.src_dims); + key_creator.AddAsKey(fwdParams.dst_dims); + key_creator.AddAsKey(fwdParams.filter_dims); + key_creator.AddAsKey(fwdParams.strides); + key_creator.AddAsKey(fwdParams.padding_left); + key_creator.AddAsKey(fwdParams.padding_right); + key_creator.AddAsKey(static_cast(fwdParams.alg_kind)); + return key_creator.GetKey(); + } + + MklPrimitive* GetPoolingFwd(const MklPoolingParams& fwdParams) { + std::string key = CreateKey(fwdParams); + return this->GetOp(key); + } + + void SetPoolingFwd(const MklPoolingParams& fwdParams, MklPrimitive *op) { + std::string key = CreateKey(fwdParams); + this->SetOp(key, op); + } +}; + + +template +class MklPoolingBwdPrimitive : public MklPrimitive { + public: + explicit MklPoolingBwdPrimitive(const MklPoolingParams& bwdParams) { + context_.bwd_stream.reset(new stream(stream::kind::eager)); + if (context_.bwd == nullptr) + Setup(bwdParams); + } + + ~MklPoolingBwdPrimitive() {} + + // Pooling backward execute + // diff_dst_data: input data buffer of diff_dst + // diff_src_data: output data buffer of diff_src + // ws_data: input data buffer of workspace + void Execute(const T* diff_dst_data, const T* diff_src_data, + const void* ws_data = nullptr); + + public: + std::shared_ptr + GetPoolingFwdPd() const { + return context_.fwd_pd; + } + std::shared_ptr + GetPoolingBwdPd() const { + return context_.bwd_pd; + } + + memory::format GetDiffDstFormat() const { + return context_.diff_dst_fmt; + } + + mkldnn::memory::data_type GetWorkspaceDataType() const { + return context_.ws_dt; + } + memory::format GetWorkspaceFormat() const { + return context_.ws_fmt; + } + + private: + void Setup(const MklPoolingParams& bwdParams); + // Primitive reuse context for pooling bwd ops + struct PoolingBwdContext { + // algorithm + mkldnn::algorithm alg_kind; + + // expected memory format + mkldnn::memory::format diff_src_fmt; + mkldnn::memory::format diff_dst_fmt; + mkldnn::memory::format ws_fmt; + + // workspace attribute + mkldnn::memory::dims ws_dims; + mkldnn::memory::data_type ws_dt; + + // MKL-DNN memory + std::shared_ptr ws_mem; + std::shared_ptr diff_src_mem; + std::shared_ptr diff_dst_mem; + + // memory desc + std::shared_ptr diff_src_md; + std::shared_ptr diff_dst_md; + + // desc & primitive desc + std::shared_ptr fwd_desc; + std::shared_ptr bwd_desc; + std::shared_ptr fwd_pd; + std::shared_ptr bwd_pd; + + // pooling primitive + std::shared_ptr bwd; + std::shared_ptr bwd_stream; + + std::vector bwd_primitives; + + PoolingBwdContext() : + diff_src_fmt(memory::format::any), diff_dst_fmt(memory::format::any), + ws_fmt(memory::format::any), ws_mem(nullptr), diff_src_mem(nullptr), + diff_dst_mem(nullptr), diff_src_md(nullptr), diff_dst_md(nullptr), + fwd_desc(nullptr), bwd_desc(nullptr), fwd_pd(nullptr), bwd_pd(nullptr), + bwd(nullptr), bwd_stream(nullptr) { + } + } context_; + // cpu engine + engine cpu_engine = engine(engine::cpu, 0); +}; + +template +class MklPoolingBwdPrimitiveFactory : public MklPrimitiveFactory { + public: + static MklPoolingBwdPrimitive *Get(const MklPoolingParams& bwdParams) { + MklPoolingBwdPrimitive* pooling_backward = nullptr; + + // Find a pooling backward primitive from the pool + // If it does not exist, create a new one + pooling_backward = static_cast*>( + MklPoolingBwdPrimitiveFactory::GetInstance().GetPoolingBwd(bwdParams)); + if (pooling_backward == nullptr) { + pooling_backward = new MklPoolingBwdPrimitive(bwdParams); + MklPoolingBwdPrimitiveFactory::GetInstance().SetPoolingBwd( + bwdParams, pooling_backward); + } + return pooling_backward; + } + + static MklPoolingBwdPrimitiveFactory& GetInstance() { + static MklPoolingBwdPrimitiveFactory instance_; + return instance_; + } + + private: + MklPoolingBwdPrimitiveFactory() {} + ~MklPoolingBwdPrimitiveFactory() {} + + // The key to be created will be used to get/set pooling + // primitive op from reuse perspective. + // A pooling key is a string which concates key parameters + // as well as algorithm kind (max versus avg). + static std::string CreateKey(const MklPoolingParams& bwdParams) { + std::string prefix = "pooling_bwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(bwdParams.src_dims); + key_creator.AddAsKey(bwdParams.dst_dims); + key_creator.AddAsKey(bwdParams.filter_dims); + key_creator.AddAsKey(bwdParams.strides); + key_creator.AddAsKey(bwdParams.padding_left); + key_creator.AddAsKey(bwdParams.padding_right); + key_creator.AddAsKey(static_cast(bwdParams.alg_kind)); + return key_creator.GetKey(); + } + + MklPrimitive* GetPoolingBwd(const MklPoolingParams& bwdParams) { + std::string key = CreateKey(bwdParams); + return this->GetOp(key); + } + + void SetPoolingBwd(const MklPoolingParams& bwdParams, MklPrimitive *op) { + std::string key = CreateKey(bwdParams); + this->SetOp(key, op); + } +}; +#endif + typedef Eigen::ThreadPoolDevice CPUDevice; struct MklPoolParameters { @@ -351,7 +661,7 @@ class MklPoolingBackwardOpBase : public MklPoolingOpBase { memory::desc ConfigureOriginalOutput( const MklPoolParameters& pool_params, const MklDnnShape& original_output_mkl_shape, - memory::dims output_dims_mkl_order) { + const memory::dims& output_dims_mkl_order) { this->GetOutputDims(pool_params, &output_dims_mkl_order); return original_output_mkl_shape.IsMklTensor() diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 230b4278ca..dde71b4116 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -36,7 +36,7 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" - +#include "tensorflow/core/platform/cpu_info.h" #ifndef INTEL_MKL_ML #include "mkldnn.hpp" @@ -1482,7 +1482,8 @@ class MklDnnData { /// Operations memory descriptor memory::desc* op_md_; - + /// Operations temp buffer + void* allocated_buffer_; /// CPU engine on which operation will be executed const engine* cpu_engine_; @@ -1491,6 +1492,7 @@ class MklDnnData { : user_memory_(nullptr), reorder_memory_(nullptr), op_md_(nullptr), + allocated_buffer_(nullptr), cpu_engine_(e) {} ~MklDnnData() { @@ -1631,6 +1633,14 @@ class MklDnnData { user_memory_->set_data_handle(GetTensorBuffer(tensor)); } + /// allocate function for data buffer + inline void AllocateBuffer(size_t size) { + allocated_buffer_ = cpu_allocator()->AllocateRaw(64, size); + } + inline void* GetAllocatedBuffer() { + return allocated_buffer_; + } + /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -1794,11 +1804,11 @@ class MklDnnData { } }; -/// Base class for operations with reuse of DNN primitives +/// Base class for operations with reuse of primitives /// -class DnnOp { +class MklPrimitive { public: - virtual ~DnnOp() {} + virtual ~MklPrimitive() {} // Dummy data. Its size, hard-coded as 256 here, does // not matter since MKL should never operate on this buffer. @@ -1808,31 +1818,31 @@ class DnnOp { const mkldnn::memory::dims NONE_DIMS = {}; // This constant is used to declare dummy buffer (size), for MKL primitives template -class DnnOpFactory { +class MklPrimitiveFactory { public: - DnnOpFactory() {} - ~DnnOpFactory() {} + MklPrimitiveFactory() {} + ~MklPrimitiveFactory() {} - DnnOp* GetOp(const std::string& key) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); - if (stream_iter == DnnOpFactory::GetHashMap().end()) { + MklPrimitive* GetOp(const std::string& key) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { return nullptr; } else { return stream_iter->second; } } - void SetOp(const std::string& key, DnnOp* op) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); + void SetOp(const std::string& key, MklPrimitive* op) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - CHECK(stream_iter == DnnOpFactory::GetHashMap().end()); + CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); - DnnOpFactory::GetHashMap()[key] = op; + MklPrimitiveFactory::GetHashMap()[key] = op; } private: - static inline std::unordered_map &GetHashMap() { - static thread_local std::unordered_map map_; + static inline std::unordered_map &GetHashMap() { + static thread_local std::unordered_map map_; return map_; } }; @@ -1877,6 +1887,21 @@ class FactoryKeyCreator { } }; +static inline memory::format get_desired_format(int channel) { + memory::format fmt_desired = memory::format::any; + + if (port::TestCPUFeature(port::CPUFeature::AVX512F) && + (channel % 16) == 0) { + fmt_desired = memory::format::nChw16c; + } else if (port::TestCPUFeature(port::CPUFeature::AVX2) && + (channel % 8) == 0) { + fmt_desired = memory::format::nChw8c; + } else { + fmt_desired = memory::format::nchw; + } + return fmt_desired; +} + #endif // INTEL_MKL_DNN } // namespace tensorflow -- GitLab From d89e88ba872f901fb9134385bfb782fc408f51d2 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 21 May 2018 13:38:36 -0700 Subject: [PATCH 0019/2038] revert mkl_conv_ops.cc to avoid PR review confusion --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 ++++++++++-------------- 1 file changed, 116 insertions(+), 164 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index c032add82e..f2b14f1278 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,8 +59,7 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -// This structure aggregates multiple inputs to Conv2DFwd* methods. -struct MklConvFwdParams { +struct ConvFwdDimensions { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -70,7 +69,7 @@ struct MklConvFwdParams { memory::dims padding_left; memory::dims padding_right; - MklConvFwdParams(memory::dims src_dims, + ConvFwdDimensions(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -83,40 +82,35 @@ struct MklConvFwdParams { }; template -class MklConv2DFwdPrimitive: public MklPrimitive { +class Conv2DFwd : public DnnOp { public: - explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { - context_.fwd_stream.reset(new stream(stream::kind::eager)); + explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { + fwd_stream_.reset(new stream(stream::kind::eager)); // create conv primitive - if (context_.conv_fwd == nullptr) { + if (conv_fwd_ == nullptr) { Setup(convFwdDims); } } - ~MklConv2DFwdPrimitive() {} + ~Conv2DFwd() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* bias_data, const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.bias_mem->set_data_handle( - static_cast(const_cast(bias_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); + void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + bias_mem_->set_data_handle(static_cast(bias_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); // after exec, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.bias_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + bias_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); return; } @@ -125,174 +119,139 @@ class MklConv2DFwdPrimitive: public MklPrimitive { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); - - // after execution, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + void Execute(T* src_data, T* filter_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); - return; - } + // after exec, set data handle back + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); - memory::format GetSrcMemoryFormat() const { - return context_.src_fmt; + return; } - memory::format GetFilterMemoryFormat() const { - return context_.filter_fmt; - } + // expected memory format for this primitive instance + memory::format src_fmt_; + memory::format filter_fmt_; - std::shared_ptr - GetPrimitiveDesc() const { - return context_.fwd_pd; - } + // convolution primitive + std::shared_ptr fwd_pd_; + std::shared_ptr conv_fwd_; private: - // Primitive reuse context for Conv2D Fwd op - struct ConvFwdContext { - // expected memory format for this primitive instance - memory::format src_fmt; - memory::format filter_fmt; - - // MKLDNN memory - std::shared_ptr src_mem; - std::shared_ptr filter_mem; - std::shared_ptr bias_mem; - std::shared_ptr dst_mem; - - // desc & prmitive desc - std::shared_ptr fwd_desc; - - // memory desc - std::shared_ptr src_md; - std::shared_ptr filter_md; - std::shared_ptr bias_md; - std::shared_ptr dst_md; - - // convolution primitive - std::shared_ptr fwd_pd; - std::shared_ptr conv_fwd; - - std::shared_ptr fwd_stream; - std::vector fwd_primitives; - - ConvFwdContext() : - src_fmt(memory::format::any), filter_fmt(memory::format::any), - src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), - dst_mem(nullptr), fwd_desc(nullptr), - src_md(nullptr), filter_md(nullptr), bias_md(nullptr), - fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { - } - } context_; - - engine cpu_engine_ = engine(engine::cpu, 0); - - void Setup(const MklConvFwdParams& convFwdDims) { + void Setup(const ConvFwdDimensions& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, + src_md_.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, + filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, + dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, + bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.bias_md, *context_.dst_md, + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, - convFwdDims.padding_left, convFwdDims.padding_right, - padding_kind::zero)); + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *dst_md_, + convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, + convFwdDims.padding_right, padding_kind::zero)); } - context_.fwd_pd.reset(new convolution_forward::primitive_desc( - *context_.fwd_desc, cpu_engine_)); + fwd_pd_.reset(new convolution_forward::primitive_desc( + *fwd_desc_, cpu_engine_)); // store the expected memory format - context_.src_fmt = static_cast( - context_.fwd_pd.get()->src_primitive_desc().desc().data.format); + src_fmt_ = static_cast( + fwd_pd_.get()->src_primitive_desc().desc().data.format); - context_.filter_fmt = static_cast( - context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); + filter_fmt_ = static_cast( + fwd_pd_.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - context_.src_mem.reset(new memory( - context_.fwd_pd.get()->src_primitive_desc(), DummyData)); - context_.filter_mem.reset(new memory( - context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); - context_.dst_mem.reset(new memory( - context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); + filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), + DummyData)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, - MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, - *context_.bias_mem, *context_.dst_mem)); + bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), + memory::format::x}, cpu_engine_}, DummyData)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *bias_mem_, *dst_mem_)); } else { - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, - *context_.filter_mem, *context_.dst_mem)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *dst_mem_)); } - context_.fwd_primitives.push_back(*context_.conv_fwd); + fwd_primitives_.push_back(*conv_fwd_); return; } + + // MKLDNN memory + std::shared_ptr src_mem_; + std::shared_ptr filter_mem_; + std::shared_ptr bias_mem_; + std::shared_ptr dst_mem_; + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + // desc & prmitive desc + std::shared_ptr fwd_desc_; + + // memory desc + std::shared_ptr src_md_; + std::shared_ptr filter_md_; + std::shared_ptr bias_md_; + std::shared_ptr dst_md_; + + engine cpu_engine_ = engine(engine::cpu, 0); }; template -class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { +class Conv2DFwdFactory : public DnnOpFactory { public: - static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { - MklConv2DFwdPrimitive* conv2d_fwd = nullptr; + static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { + Conv2DFwd* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( - convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); - MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new Conv2DFwd(convFwdDims); + Conv2DFwdFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - MklConv2DFwdPrimitiveFactory() {} - ~MklConv2DFwdPrimitiveFactory() {} + Conv2DFwdFactory() {} + ~Conv2DFwdFactory() {} static const int kDilationH = 0, kDilationW = 1; - static MklConv2DFwdPrimitiveFactory& GetInstance() { - static MklConv2DFwdPrimitiveFactory instance_; + static Conv2DFwdFactory& GetInstance() { + static Conv2DFwdFactory instance_; return instance_; } - static std::string CreateKey(const MklConvFwdParams& convFwdDims) { + static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -307,12 +266,12 @@ class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { return key_creator.GetKey(); } - MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { + DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { + void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -803,6 +762,7 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); + MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -852,6 +812,7 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); + src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -859,28 +820,29 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); + filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - MklConv2DFwdPrimitive *conv2d_fwd = nullptr; + Conv2DFwd *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } else { - MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); + conv_fwd_pd = conv2d_fwd->fwd_pd_; AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -892,30 +854,20 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - T *src_data = nullptr; - if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { - src.SetUsrMem(src_md, &src_tensor); - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast(const_cast( - src_tensor.flat().data())); - } - T *filter_data = nullptr; - if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { - filter.SetUsrMem(filter_md, &filter_tensor); - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); - filter_data = static_cast(filter.GetOpMem().get_data_handle()); - } else { - filter_data = static_cast(const_cast( - filter_tensor.flat().data())); - } - + if (src_md.data.format != conv2d_fwd->src_fmt_) + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + + if (filter_md.data.format != conv2d_fwd->filter_fmt_) + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); stream(stream::kind::eager).submit(net).wait(); + T* src_data = static_cast( + src.GetOpMem().get_data_handle()); + T* filter_data = static_cast( + filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { -- GitLab From ede9ff1875c4b16555246b0f54cd8502a78174d8 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 21 May 2018 13:43:42 -0700 Subject: [PATCH 0020/2038] revert mkl_conv_ops.cc to avoid PR review confusion --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 ++++++++++-------------- 1 file changed, 116 insertions(+), 164 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index c032add82e..f2b14f1278 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,8 +59,7 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -// This structure aggregates multiple inputs to Conv2DFwd* methods. -struct MklConvFwdParams { +struct ConvFwdDimensions { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -70,7 +69,7 @@ struct MklConvFwdParams { memory::dims padding_left; memory::dims padding_right; - MklConvFwdParams(memory::dims src_dims, + ConvFwdDimensions(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -83,40 +82,35 @@ struct MklConvFwdParams { }; template -class MklConv2DFwdPrimitive: public MklPrimitive { +class Conv2DFwd : public DnnOp { public: - explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { - context_.fwd_stream.reset(new stream(stream::kind::eager)); + explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { + fwd_stream_.reset(new stream(stream::kind::eager)); // create conv primitive - if (context_.conv_fwd == nullptr) { + if (conv_fwd_ == nullptr) { Setup(convFwdDims); } } - ~MklConv2DFwdPrimitive() {} + ~Conv2DFwd() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* bias_data, const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.bias_mem->set_data_handle( - static_cast(const_cast(bias_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); + void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + bias_mem_->set_data_handle(static_cast(bias_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); // after exec, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.bias_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + bias_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); return; } @@ -125,174 +119,139 @@ class MklConv2DFwdPrimitive: public MklPrimitive { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); - - // after execution, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + void Execute(T* src_data, T* filter_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); - return; - } + // after exec, set data handle back + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); - memory::format GetSrcMemoryFormat() const { - return context_.src_fmt; + return; } - memory::format GetFilterMemoryFormat() const { - return context_.filter_fmt; - } + // expected memory format for this primitive instance + memory::format src_fmt_; + memory::format filter_fmt_; - std::shared_ptr - GetPrimitiveDesc() const { - return context_.fwd_pd; - } + // convolution primitive + std::shared_ptr fwd_pd_; + std::shared_ptr conv_fwd_; private: - // Primitive reuse context for Conv2D Fwd op - struct ConvFwdContext { - // expected memory format for this primitive instance - memory::format src_fmt; - memory::format filter_fmt; - - // MKLDNN memory - std::shared_ptr src_mem; - std::shared_ptr filter_mem; - std::shared_ptr bias_mem; - std::shared_ptr dst_mem; - - // desc & prmitive desc - std::shared_ptr fwd_desc; - - // memory desc - std::shared_ptr src_md; - std::shared_ptr filter_md; - std::shared_ptr bias_md; - std::shared_ptr dst_md; - - // convolution primitive - std::shared_ptr fwd_pd; - std::shared_ptr conv_fwd; - - std::shared_ptr fwd_stream; - std::vector fwd_primitives; - - ConvFwdContext() : - src_fmt(memory::format::any), filter_fmt(memory::format::any), - src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), - dst_mem(nullptr), fwd_desc(nullptr), - src_md(nullptr), filter_md(nullptr), bias_md(nullptr), - fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { - } - } context_; - - engine cpu_engine_ = engine(engine::cpu, 0); - - void Setup(const MklConvFwdParams& convFwdDims) { + void Setup(const ConvFwdDimensions& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, + src_md_.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, + filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, + dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, + bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.bias_md, *context_.dst_md, + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, - convFwdDims.padding_left, convFwdDims.padding_right, - padding_kind::zero)); + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *dst_md_, + convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, + convFwdDims.padding_right, padding_kind::zero)); } - context_.fwd_pd.reset(new convolution_forward::primitive_desc( - *context_.fwd_desc, cpu_engine_)); + fwd_pd_.reset(new convolution_forward::primitive_desc( + *fwd_desc_, cpu_engine_)); // store the expected memory format - context_.src_fmt = static_cast( - context_.fwd_pd.get()->src_primitive_desc().desc().data.format); + src_fmt_ = static_cast( + fwd_pd_.get()->src_primitive_desc().desc().data.format); - context_.filter_fmt = static_cast( - context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); + filter_fmt_ = static_cast( + fwd_pd_.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - context_.src_mem.reset(new memory( - context_.fwd_pd.get()->src_primitive_desc(), DummyData)); - context_.filter_mem.reset(new memory( - context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); - context_.dst_mem.reset(new memory( - context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); + filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), + DummyData)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, - MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, - *context_.bias_mem, *context_.dst_mem)); + bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), + memory::format::x}, cpu_engine_}, DummyData)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *bias_mem_, *dst_mem_)); } else { - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, - *context_.filter_mem, *context_.dst_mem)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *dst_mem_)); } - context_.fwd_primitives.push_back(*context_.conv_fwd); + fwd_primitives_.push_back(*conv_fwd_); return; } + + // MKLDNN memory + std::shared_ptr src_mem_; + std::shared_ptr filter_mem_; + std::shared_ptr bias_mem_; + std::shared_ptr dst_mem_; + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + // desc & prmitive desc + std::shared_ptr fwd_desc_; + + // memory desc + std::shared_ptr src_md_; + std::shared_ptr filter_md_; + std::shared_ptr bias_md_; + std::shared_ptr dst_md_; + + engine cpu_engine_ = engine(engine::cpu, 0); }; template -class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { +class Conv2DFwdFactory : public DnnOpFactory { public: - static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { - MklConv2DFwdPrimitive* conv2d_fwd = nullptr; + static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { + Conv2DFwd* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( - convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); - MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new Conv2DFwd(convFwdDims); + Conv2DFwdFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - MklConv2DFwdPrimitiveFactory() {} - ~MklConv2DFwdPrimitiveFactory() {} + Conv2DFwdFactory() {} + ~Conv2DFwdFactory() {} static const int kDilationH = 0, kDilationW = 1; - static MklConv2DFwdPrimitiveFactory& GetInstance() { - static MklConv2DFwdPrimitiveFactory instance_; + static Conv2DFwdFactory& GetInstance() { + static Conv2DFwdFactory instance_; return instance_; } - static std::string CreateKey(const MklConvFwdParams& convFwdDims) { + static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -307,12 +266,12 @@ class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { return key_creator.GetKey(); } - MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { + DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { + void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -803,6 +762,7 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); + MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -852,6 +812,7 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); + src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -859,28 +820,29 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); + filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - MklConv2DFwdPrimitive *conv2d_fwd = nullptr; + Conv2DFwd *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } else { - MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); + conv_fwd_pd = conv2d_fwd->fwd_pd_; AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -892,30 +854,20 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - T *src_data = nullptr; - if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { - src.SetUsrMem(src_md, &src_tensor); - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast(const_cast( - src_tensor.flat().data())); - } - T *filter_data = nullptr; - if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { - filter.SetUsrMem(filter_md, &filter_tensor); - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); - filter_data = static_cast(filter.GetOpMem().get_data_handle()); - } else { - filter_data = static_cast(const_cast( - filter_tensor.flat().data())); - } - + if (src_md.data.format != conv2d_fwd->src_fmt_) + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + + if (filter_md.data.format != conv2d_fwd->filter_fmt_) + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); stream(stream::kind::eager).submit(net).wait(); + T* src_data = static_cast( + src.GetOpMem().get_data_handle()); + T* filter_data = static_cast( + filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { -- GitLab From e84a1cb522f868257284cd440840bcbd81cbea78 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 22 May 2018 21:52:48 -0700 Subject: [PATCH 0021/2038] minor code style fix --- tensorflow/core/util/mkl_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index f19307756f..c4b5e124fb 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1794,7 +1794,7 @@ class MklDnnData { } }; -/// Base class for operations with reuse of primitives +/// Base class for operations with reuse of primitives /// class MklPrimitive { public: -- GitLab From 52485e7b66c2b44174f76394937ea90bff898ac4 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 22 May 2018 21:58:18 -0700 Subject: [PATCH 0022/2038] minor code style fix --- tensorflow/core/util/mkl_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 5dd2ee4521..b382065e4b 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1804,7 +1804,7 @@ class MklDnnData { } }; -/// Base class for operations with reuse of primitives +/// Base class for operations with reuse of primitives /// class MklPrimitive { public: -- GitLab From 834f0fabf73bae7c3abc0ab0a37296dfe2848298 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 22 May 2018 22:00:47 -0700 Subject: [PATCH 0023/2038] minor code style fix --- tensorflow/core/util/mkl_util.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index dde71b4116..b382065e4b 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1804,7 +1804,7 @@ class MklDnnData { } }; -/// Base class for operations with reuse of primitives +/// Base class for operations with reuse of primitives /// class MklPrimitive { public: @@ -1890,12 +1890,12 @@ class FactoryKeyCreator { static inline memory::format get_desired_format(int channel) { memory::format fmt_desired = memory::format::any; - if (port::TestCPUFeature(port::CPUFeature::AVX512F) && - (channel % 16) == 0) { - fmt_desired = memory::format::nChw16c; - } else if (port::TestCPUFeature(port::CPUFeature::AVX2) && - (channel % 8) == 0) { - fmt_desired = memory::format::nChw8c; + if (port::TestCPUFeature(port::CPUFeature::AVX512F) + && (channel % 16) == 0) { + fmt_desired = memory::format::nChw16c; + } else if (port::TestCPUFeature(port::CPUFeature::AVX2) + && (channel % 8) == 0) { + fmt_desired = memory::format::nChw8c; } else { fmt_desired = memory::format::nchw; } -- GitLab From 8f3635d15438af7c8f6f047a5999352d14901db7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 17 May 2018 17:50:20 +0000 Subject: [PATCH 0024/2038] Improve fast_tensor_util for bfloat16 In 19180, improvement has been done to speed up the fast_tensor_util for `float16`. As both `float16` and `bfloat16` uses the same size, `bfloat16` could be improved as well. This fix speeds up `bfloat16` in a similiar fashion as `float16`. This fix is related to 19180. Signed-off-by: Yong Tang --- tensorflow/python/framework/fast_tensor_util.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/python/framework/fast_tensor_util.pyx b/tensorflow/python/framework/fast_tensor_util.pyx index 17d112a1ec..2e3e15f53a 100644 --- a/tensorflow/python/framework/fast_tensor_util.pyx +++ b/tensorflow/python/framework/fast_tensor_util.pyx @@ -6,6 +6,13 @@ cimport numpy as np from tensorflow.python.util import compat +def AppendBFloat16ArrayToTensorProto( + tensor_proto, np.ndarray[np.uint16_t, ndim=1] nparray): + cdef long i, n + n = nparray.size + for i in range(n): + tensor_proto.half_val.append(nparray[i]) + def AppendFloat16ArrayToTensorProto( # For numpy, npy_half is a typedef for npy_uint16, -- GitLab From 6d5f5efadc6047828e79e0bdb4af133e0269faa7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 17 May 2018 17:53:07 +0000 Subject: [PATCH 0025/2038] Update tensorflow/python/framework/tensor_util.py Signed-off-by: Yong Tang --- tensorflow/python/framework/tensor_util.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index ca63efbc84..ebb1db534c 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -67,10 +67,14 @@ def SlowAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values): [ExtractBitsFromBFloat16(x) for x in proto_values]) +def FastAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values): + fast_tensor_util.AppendBFloat16ArrayToTensorProto(tensor_proto, np.asarray(proto_values, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16)) + + if _FAST_TENSOR_UTIL_AVAILABLE: _NP_TO_APPEND_FN = { dtypes.bfloat16.as_numpy_dtype: - SlowAppendBFloat16ArrayToTensorProto, + FastAppendBFloat16ArrayToTensorProto, np.float16: _MediumAppendFloat16ArrayToTensorProto, np.float32: -- GitLab From 535fa4919c9e247e2df673d8af874c3a39a38976 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 17 May 2018 17:55:10 +0000 Subject: [PATCH 0026/2038] Pylint fix. Signed-off-by: Yong Tang --- tensorflow/python/framework/tensor_util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index ebb1db534c..e229d6105e 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -68,7 +68,9 @@ def SlowAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values): def FastAppendBFloat16ArrayToTensorProto(tensor_proto, proto_values): - fast_tensor_util.AppendBFloat16ArrayToTensorProto(tensor_proto, np.asarray(proto_values, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16)) + fast_tensor_util.AppendBFloat16ArrayToTensorProto( + tensor_proto, np.asarray( + proto_values, dtype=dtypes.bfloat16.as_numpy_dtype).view(np.uint16)) if _FAST_TENSOR_UTIL_AVAILABLE: -- GitLab From 327bea444a2f8c58e8844561dd20abb88032cd89 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 4 Jun 2018 09:39:54 -0700 Subject: [PATCH 0027/2038] revert changes with mkl_conv_ops.cc and mkl_util.h --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 ++++++++++-------------- tensorflow/core/util/mkl_util.h | 32 +-- 2 files changed, 132 insertions(+), 180 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index c032add82e..f2b14f1278 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,8 +59,7 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -// This structure aggregates multiple inputs to Conv2DFwd* methods. -struct MklConvFwdParams { +struct ConvFwdDimensions { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -70,7 +69,7 @@ struct MklConvFwdParams { memory::dims padding_left; memory::dims padding_right; - MklConvFwdParams(memory::dims src_dims, + ConvFwdDimensions(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -83,40 +82,35 @@ struct MklConvFwdParams { }; template -class MklConv2DFwdPrimitive: public MklPrimitive { +class Conv2DFwd : public DnnOp { public: - explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { - context_.fwd_stream.reset(new stream(stream::kind::eager)); + explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { + fwd_stream_.reset(new stream(stream::kind::eager)); // create conv primitive - if (context_.conv_fwd == nullptr) { + if (conv_fwd_ == nullptr) { Setup(convFwdDims); } } - ~MklConv2DFwdPrimitive() {} + ~Conv2DFwd() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* bias_data, const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.bias_mem->set_data_handle( - static_cast(const_cast(bias_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); + void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + bias_mem_->set_data_handle(static_cast(bias_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); // after exec, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.bias_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + bias_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); return; } @@ -125,174 +119,139 @@ class MklConv2DFwdPrimitive: public MklPrimitive { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); - - // after execution, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + void Execute(T* src_data, T* filter_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); - return; - } + // after exec, set data handle back + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); - memory::format GetSrcMemoryFormat() const { - return context_.src_fmt; + return; } - memory::format GetFilterMemoryFormat() const { - return context_.filter_fmt; - } + // expected memory format for this primitive instance + memory::format src_fmt_; + memory::format filter_fmt_; - std::shared_ptr - GetPrimitiveDesc() const { - return context_.fwd_pd; - } + // convolution primitive + std::shared_ptr fwd_pd_; + std::shared_ptr conv_fwd_; private: - // Primitive reuse context for Conv2D Fwd op - struct ConvFwdContext { - // expected memory format for this primitive instance - memory::format src_fmt; - memory::format filter_fmt; - - // MKLDNN memory - std::shared_ptr src_mem; - std::shared_ptr filter_mem; - std::shared_ptr bias_mem; - std::shared_ptr dst_mem; - - // desc & prmitive desc - std::shared_ptr fwd_desc; - - // memory desc - std::shared_ptr src_md; - std::shared_ptr filter_md; - std::shared_ptr bias_md; - std::shared_ptr dst_md; - - // convolution primitive - std::shared_ptr fwd_pd; - std::shared_ptr conv_fwd; - - std::shared_ptr fwd_stream; - std::vector fwd_primitives; - - ConvFwdContext() : - src_fmt(memory::format::any), filter_fmt(memory::format::any), - src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), - dst_mem(nullptr), fwd_desc(nullptr), - src_md(nullptr), filter_md(nullptr), bias_md(nullptr), - fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { - } - } context_; - - engine cpu_engine_ = engine(engine::cpu, 0); - - void Setup(const MklConvFwdParams& convFwdDims) { + void Setup(const ConvFwdDimensions& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, + src_md_.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, + filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, + dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, + bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.bias_md, *context_.dst_md, + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, - convFwdDims.padding_left, convFwdDims.padding_right, - padding_kind::zero)); + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *dst_md_, + convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, + convFwdDims.padding_right, padding_kind::zero)); } - context_.fwd_pd.reset(new convolution_forward::primitive_desc( - *context_.fwd_desc, cpu_engine_)); + fwd_pd_.reset(new convolution_forward::primitive_desc( + *fwd_desc_, cpu_engine_)); // store the expected memory format - context_.src_fmt = static_cast( - context_.fwd_pd.get()->src_primitive_desc().desc().data.format); + src_fmt_ = static_cast( + fwd_pd_.get()->src_primitive_desc().desc().data.format); - context_.filter_fmt = static_cast( - context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); + filter_fmt_ = static_cast( + fwd_pd_.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - context_.src_mem.reset(new memory( - context_.fwd_pd.get()->src_primitive_desc(), DummyData)); - context_.filter_mem.reset(new memory( - context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); - context_.dst_mem.reset(new memory( - context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); + filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), + DummyData)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, - MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, - *context_.bias_mem, *context_.dst_mem)); + bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), + memory::format::x}, cpu_engine_}, DummyData)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *bias_mem_, *dst_mem_)); } else { - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, - *context_.filter_mem, *context_.dst_mem)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *dst_mem_)); } - context_.fwd_primitives.push_back(*context_.conv_fwd); + fwd_primitives_.push_back(*conv_fwd_); return; } + + // MKLDNN memory + std::shared_ptr src_mem_; + std::shared_ptr filter_mem_; + std::shared_ptr bias_mem_; + std::shared_ptr dst_mem_; + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + // desc & prmitive desc + std::shared_ptr fwd_desc_; + + // memory desc + std::shared_ptr src_md_; + std::shared_ptr filter_md_; + std::shared_ptr bias_md_; + std::shared_ptr dst_md_; + + engine cpu_engine_ = engine(engine::cpu, 0); }; template -class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { +class Conv2DFwdFactory : public DnnOpFactory { public: - static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { - MklConv2DFwdPrimitive* conv2d_fwd = nullptr; + static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { + Conv2DFwd* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( - convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); - MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new Conv2DFwd(convFwdDims); + Conv2DFwdFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - MklConv2DFwdPrimitiveFactory() {} - ~MklConv2DFwdPrimitiveFactory() {} + Conv2DFwdFactory() {} + ~Conv2DFwdFactory() {} static const int kDilationH = 0, kDilationW = 1; - static MklConv2DFwdPrimitiveFactory& GetInstance() { - static MklConv2DFwdPrimitiveFactory instance_; + static Conv2DFwdFactory& GetInstance() { + static Conv2DFwdFactory instance_; return instance_; } - static std::string CreateKey(const MklConvFwdParams& convFwdDims) { + static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -307,12 +266,12 @@ class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { return key_creator.GetKey(); } - MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { + DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { + void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -803,6 +762,7 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); + MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -852,6 +812,7 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); + src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -859,28 +820,29 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); + filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - MklConv2DFwdPrimitive *conv2d_fwd = nullptr; + Conv2DFwd *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } else { - MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); + conv_fwd_pd = conv2d_fwd->fwd_pd_; AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -892,30 +854,20 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - T *src_data = nullptr; - if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { - src.SetUsrMem(src_md, &src_tensor); - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast(const_cast( - src_tensor.flat().data())); - } - T *filter_data = nullptr; - if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { - filter.SetUsrMem(filter_md, &filter_tensor); - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); - filter_data = static_cast(filter.GetOpMem().get_data_handle()); - } else { - filter_data = static_cast(const_cast( - filter_tensor.flat().data())); - } - + if (src_md.data.format != conv2d_fwd->src_fmt_) + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + + if (filter_md.data.format != conv2d_fwd->filter_fmt_) + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); stream(stream::kind::eager).submit(net).wait(); + T* src_data = static_cast( + src.GetOpMem().get_data_handle()); + T* filter_data = static_cast( + filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index c4b5e124fb..230b4278ca 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1794,11 +1794,11 @@ class MklDnnData { } }; -/// Base class for operations with reuse of primitives +/// Base class for operations with reuse of DNN primitives /// -class MklPrimitive { +class DnnOp { public: - virtual ~MklPrimitive() {} + virtual ~DnnOp() {} // Dummy data. Its size, hard-coded as 256 here, does // not matter since MKL should never operate on this buffer. @@ -1806,33 +1806,33 @@ class MklPrimitive { }; const mkldnn::memory::dims NONE_DIMS = {}; - +// This constant is used to declare dummy buffer (size), for MKL primitives template -class MklPrimitiveFactory { +class DnnOpFactory { public: - MklPrimitiveFactory() {} - ~MklPrimitiveFactory() {} + DnnOpFactory() {} + ~DnnOpFactory() {} - MklPrimitive* GetOp(const std::string& key) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { + DnnOp* GetOp(const std::string& key) { + auto stream_iter = DnnOpFactory::GetHashMap().find(key); + if (stream_iter == DnnOpFactory::GetHashMap().end()) { return nullptr; } else { return stream_iter->second; } } - void SetOp(const std::string& key, MklPrimitive* op) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + void SetOp(const std::string& key, DnnOp* op) { + auto stream_iter = DnnOpFactory::GetHashMap().find(key); - CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); + CHECK(stream_iter == DnnOpFactory::GetHashMap().end()); - MklPrimitiveFactory::GetHashMap()[key] = op; + DnnOpFactory::GetHashMap()[key] = op; } private: - static inline std::unordered_map &GetHashMap() { - static thread_local std::unordered_map map_; + static inline std::unordered_map &GetHashMap() { + static thread_local std::unordered_map map_; return map_; } }; -- GitLab From d03f0ca404d7f092441a868865ad49b0706ae2df Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 4 Jun 2018 14:23:52 -0700 Subject: [PATCH 0028/2038] remove unused methods: PrepareAndExecuteNet, ConfigureOriginalInput and ConfigureOriginalOutput --- tensorflow/core/kernels/mkl_avgpooling_op.cc | 22 ------ tensorflow/core/kernels/mkl_maxpooling_op.cc | 12 --- .../core/kernels/mkl_pooling_ops_common.h | 78 ------------------- 3 files changed, 112 deletions(-) diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc index 7cef2778bf..f8c8db1ce5 100644 --- a/tensorflow/core/kernels/mkl_avgpooling_op.cc +++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc @@ -658,30 +658,8 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { // 1. Input("grad: T") const int kInputTensorIndexInputShape = 0; const int kInputTensorIndexInputGradient = 1; - engine cpu_engine_ = engine(engine::cpu, 0); - memory::desc ConfigureOriginalInput(OpKernelContext* context, - const Tensor& tensor_original_input_shape, - const MklDnnShape& original_input_mkl_shape, - memory::dims* original_input_dims_mkl_order, - MklPoolParameters* pool_params, - TensorShape* input_tensor_shape) { - CHECK_NOTNULL(original_input_dims_mkl_order); - CHECK_NOTNULL(pool_params); - CHECK_NOTNULL(input_tensor_shape); - // For AvgPoolGrad, we only get the size of the original input because - // The original data is irrelvant. - auto shape_vec = tensor_original_input_shape.vec(); - for (int64 i = 0; i < tensor_original_input_shape.NumElements(); ++i) { - input_tensor_shape->AddDim(shape_vec(i)); - } - - return MklPoolingBackwardOpBase::ConfigureOriginalInput( - context, tensor_original_input_shape, original_input_mkl_shape, - original_input_dims_mkl_order, pool_params, *input_tensor_shape); - } - void SanityCheckInputs(OpKernelContext* context, const Tensor& tensor_in_shape, const Tensor& input_gradient_tensor, diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index 1726521539..bba5741292 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -762,18 +762,6 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { const int kInputTensorIndexOrigOutput = 1; const int kInputTensorIndexGradient = 2; const int kInputTensorIndexWorkspace = 3; - // Output("output: T") in Base Class - - memory::desc ConfigureOriginalInput( - OpKernelContext* context, const Tensor& tensor_original_input, - const MklDnnShape& original_input_mkl_shape, - memory::dims* original_input_dims_mkl_order, - MklPoolParameters* pool_params, TensorShape* input_tensor_shape) { - *input_tensor_shape = tensor_original_input.shape(); - return MklPoolingBackwardOpBase::ConfigureOriginalInput( - context, tensor_original_input, original_input_mkl_shape, - original_input_dims_mkl_order, pool_params, *input_tensor_shape); - } void ConfigureWorkspace(const Tensor& workspace_tensor, memory::primitive_desc workspace_pd, diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index 468dc41c57..f8d6b64b0f 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -543,23 +543,6 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { CHECK_NOTNULL(*output_tensor); } - void PrepareAndExecuteNet( - const pooling_forward::primitive_desc& pool_fwd_desc, - const MklDnnData* src, MklDnnData* dst, - MklDnnData* wksp = nullptr) { - std::vector net; - - // Create pooling primitive and add it to net - if (wksp != nullptr) { - net.push_back(pooling_forward(pool_fwd_desc, src->GetOpMem(), - dst->GetOpMem(), wksp->GetOpMem())); - } else { - net.push_back( - pooling_forward(pool_fwd_desc, src->GetOpMem(), dst->GetOpMem())); - } - stream(stream::kind::eager).submit(net).wait(); - } - void SanityCheckInput(OpKernelContext* context, const Tensor& input_tensor, const MklDnnShape& input_mkl_shape) { if (!input_mkl_shape.IsMklTensor()) { @@ -609,67 +592,6 @@ class MklPoolingBackwardOpBase : public MklPoolingOpBase { CHECK_NOTNULL(*output_tensor); } - void PrepareAndExecuteNet( - const pooling_backward::primitive_desc& pool_bkwd_desc, - MklDnnData* input_gradient_diff_dst, MklDnnData* output_diff_src, - const memory::primitive_desc& target_diff_dst_pd, - const MklDnnData* workspace = nullptr) { - std::vector net; - - // If the input gradient isn't in the same format as the output - // reorder it to the same format as the output - input_gradient_diff_dst->CheckReorderToOpMem(target_diff_dst_pd, &net); - - // Create pooling primitive and add it to net - if (nullptr == workspace) { - net.push_back(pooling_backward(pool_bkwd_desc, - input_gradient_diff_dst->GetOpMem(), - output_diff_src->GetOpMem())); - } else { - net.push_back( - pooling_backward(pool_bkwd_desc, input_gradient_diff_dst->GetOpMem(), - workspace->GetOpMem(), output_diff_src->GetOpMem())); - } - stream(stream::kind::eager).submit(net).wait(); - } - - // Max Pooling and Avg Pooling have slightly different implementations - // Takes the Tensor containing original input data and the original - // mkl Dnn Shape and populates other data - memory::desc ConfigureOriginalInput( - OpKernelContext* context, const Tensor& tensor_original_input_shape, - const MklDnnShape& original_input_mkl_shape, - memory::dims* original_input_dims_nchw, MklPoolParameters* pool_params, - const TensorShape& input_tensor_shape) { - CHECK_NOTNULL(original_input_dims_nchw); - CHECK_NOTNULL(pool_params); - this->InitMklPoolParameters(context, pool_params, original_input_mkl_shape, - input_tensor_shape); - - *original_input_dims_nchw = - original_input_mkl_shape.IsMklTensor() - ? original_input_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(input_tensor_shape, - this->data_format_tf_); - - return original_input_mkl_shape.IsMklTensor() - ? original_input_mkl_shape.GetMklLayout() - : memory::desc(*original_input_dims_nchw, MklDnnType(), - this->data_format_mkldnn_); - } - - memory::desc ConfigureOriginalOutput( - const MklPoolParameters& pool_params, - const MklDnnShape& original_output_mkl_shape, - const memory::dims& output_dims_mkl_order) { - this->GetOutputDims(pool_params, &output_dims_mkl_order); - - return original_output_mkl_shape.IsMklTensor() - ? original_output_mkl_shape.GetMklLayout() - : memory::desc(output_dims_mkl_order, MklDnnType(), - this->data_format_mkldnn_); - } - memory::desc ConfigureInputGradient( const MklDnnShape& input_gradient_mkl_shape, const Tensor& input_gradient_tensor, -- GitLab From f2e22502fd58e8d81c9e080b9242375fbf2bc772 Mon Sep 17 00:00:00 2001 From: Jesse Date: Tue, 5 Jun 2018 14:35:38 +0200 Subject: [PATCH 0029/2038] Updated line for creating global step + grammar tf.train.get_global_step() returns None if there is no global step, preventing the pruning from working. Therefore, tf.train.get_or_create_global_step() is a safer option. --- tensorflow/contrib/model_pruning/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md index 86f4fd6adf..50e7e5d7cd 100644 --- a/tensorflow/contrib/model_pruning/README.md +++ b/tensorflow/contrib/model_pruning/README.md @@ -66,10 +66,10 @@ is the sparsity_function_begin_step. In this equation, the sparsity_function_exponent is set to 3. ### Adding pruning ops to the training graph -The final step involves adding ops to the training graph that monitors the -distribution of the layer's weight magnitudes and determines the layer threshold -such masking all the weights below this threshold achieves the sparsity level -desired for the current training step. This can be achieved as follows: +The final step involves adding ops to the training graph that monitor the +distribution of the layer's weight magnitudes and determine the layer threshold, +such that masking all the weights below this threshold achieves the sparsity +level desired for the current training step. This can be achieved as follows: ```python tf.app.flags.DEFINE_string( @@ -79,7 +79,7 @@ tf.app.flags.DEFINE_string( with tf.graph.as_default(): # Create global step variable - global_step = tf.train.get_global_step() + global_step = tf.train.get_or_create_global_step() # Parse pruning hyperparameters pruning_hparams = pruning.get_pruning_hparams().parse(FLAGS.pruning_hparams) -- GitLab From f9c7fe82cb930ee26d281e4bf21211ed352d176e Mon Sep 17 00:00:00 2001 From: Jesse Date: Tue, 5 Jun 2018 14:49:04 +0200 Subject: [PATCH 0030/2038] Put some emphasis on incrementing global step Pruning will not work if the global step is not incremented --- tensorflow/contrib/model_pruning/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md index 50e7e5d7cd..9143d082bf 100644 --- a/tensorflow/contrib/model_pruning/README.md +++ b/tensorflow/contrib/model_pruning/README.md @@ -103,6 +103,7 @@ with tf.graph.as_default(): mon_sess.run(mask_update_op) ``` +Ensure that `global_step` is being [incremented](https://www.tensorflow.org/api_docs/python/tf/train/Optimizer#minimize), otherwise pruning will not work! ## Example: Pruning and training deep CNNs on the cifar10 dataset -- GitLab From e106a458dd26db58c7d5abbd4afef60f8ce33252 Mon Sep 17 00:00:00 2001 From: Jesse Date: Tue, 5 Jun 2018 15:22:07 +0200 Subject: [PATCH 0031/2038] Prevent redundant ":0" in summary names Take identical approach as is done with thresholds: using tf.Variable.op.name instead of tf.Variable.name, to prevent TensorFlow saying summary names are illegal (due to ":") --- tensorflow/contrib/model_pruning/python/pruning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index 4b7af18b33..e6f9acc139 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -520,7 +520,7 @@ class Pruning(object): thresholds = get_thresholds() for index, mask in enumerate(masks): if not self._exists_in_do_not_prune_list(mask.name): - summary.scalar(mask.name + '/sparsity', nn_impl.zero_fraction(mask)) + summary.scalar(mask.op.name + '/sparsity', nn_impl.zero_fraction(mask)) summary.scalar(thresholds[index].op.name + '/threshold', thresholds[index]) -- GitLab From d46c73a8766be9342164f3c01d94879746a7b3c2 Mon Sep 17 00:00:00 2001 From: tucan <37643248+tucan9389@users.noreply.github.com> Date: Wed, 6 Jun 2018 13:26:35 +0900 Subject: [PATCH 0032/2038] Update CONTRIBUTING.md Just update clang-tidy to `clang-tidy`. --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3dad41a88c..aab506f419 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -79,7 +79,7 @@ Bazel BUILD files also need to include a license section, e.g., Changes to TensorFlow C++ code should conform to [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -Use `clang-tidy` to check your C/C++ changes. To install clang-tidy on ubuntu:16.04, do: +Use `clang-tidy` to check your C/C++ changes. To install `clang-tidy` on ubuntu:16.04, do: ```bash apt-get install -y clang-tidy -- GitLab From 90b28b7316edb644b71b01edaaa8553d5913fc19 Mon Sep 17 00:00:00 2001 From: Jesse Date: Wed, 6 Jun 2018 16:07:20 +0200 Subject: [PATCH 0033/2038] Removed redundant use of enumeration Since every mask has an accompanying threshold, zip(masks, thresholds) can be used instead of enumerate(masks) and calling thresholds by index. --- tensorflow/contrib/model_pruning/python/pruning.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index e6f9acc139..d843fa26d5 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -518,11 +518,10 @@ class Pruning(object): summary.scalar('last_mask_update_step', self._last_update_step) masks = get_masks() thresholds = get_thresholds() - for index, mask in enumerate(masks): + for mask, threshold in zip(masks, thresholds): if not self._exists_in_do_not_prune_list(mask.name): summary.scalar(mask.op.name + '/sparsity', nn_impl.zero_fraction(mask)) - summary.scalar(thresholds[index].op.name + '/threshold', - thresholds[index]) + summary.scalar(threshold.op.name + '/threshold', threshold) def print_hparams(self): logging.info(self._spec.to_json()) -- GitLab From 02b7fa3dfe3e82ca61581bf3365788c8acaa2b19 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 6 Jun 2018 14:04:40 -0700 Subject: [PATCH 0034/2038] Adding a constraint for the setuptools version. --- tensorflow/tools/pip_package/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 78d955c637..97f625e7e9 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -54,6 +54,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', + 'setuptools <= 39.1.0', 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', ] -- GitLab From da3f4f86267a42f1a7780222143d79b167a75eb1 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 6 Jun 2018 14:27:59 -0700 Subject: [PATCH 0035/2038] Removing the force downgrade install. --- tensorflow/tools/ci_build/builds/pip.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 883bb93647..5fa75e1d61 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -322,10 +322,6 @@ create_activate_virtualenv_and_install_tensorflow() { pip install -v ${PIP_FLAGS} ${WHL_PATH} || \ die "pip install (forcing to reinstall tensorflow) FAILED" echo "Successfully installed pip package ${TF_WHEEL_PATH}" - - # Force downgrade setuptools. - pip install --upgrade setuptools==39.1.0 - } ################################################################################ -- GitLab From 60cb7f88afda606df2b700ce0bb662f22e1a7709 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 7 Jun 2018 12:53:11 -0700 Subject: [PATCH 0036/2038] Consolidate `tf.data` release notes. --- RELEASE.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index c1ed69bd45..8f76e7efb4 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -14,8 +14,13 @@ ## Bug Fixes and Other Changes * `tf.data`: - * The `DatasetBase::DebugString()` method is now `const`. - * Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets. + * `Dataset.from_generator()` now accepts an `args` list, in order to create nested generators. + * `Dataset.list_files()` now produces determinstic results when `shuffle=False` or a `seed` is passed. + * `tf.contrib.data.sample_from_datasets()` and `tf.contrib.data.choose_from_datasets()` make it easier to sample or deterministically choose elements from multiple datasets. + * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings, and two infrequently used arguments removed. + * (C++) `DatasetBase::DebugString()` is now `const`. + * (C++) `DatasetBase::MakeIterator()` has been renamed to `DatasetBase::MakeIteratorInternal()`. + * (C++) `IteratorBase::Initialize()` method was added to support raising errors during iterator construction. * Eager Execution: * `tf.keras`: * Move Keras code out of _impl folder and remove API files. @@ -24,8 +29,6 @@ * Accelerated Linear Algebra (XLA): * TensorFlow Debugger (tfdbg) CLI: * `tf.contrib`: - * Add `tf.contrib.data.choose_from_datasets()`. - * `tf.contrib.data.make_csv_dataset()` now supports line breaks in quoted strings. Two arguments were removed from `make_csv_dataset`. * `tf.contrib.framework.zero_initializer` supports ResourceVariable. * Adding "constrained_optimization" to tensorflow/contrib. * Other: @@ -35,7 +38,6 @@ * More consistent GcsFileSystem behavior for certain reads past EOF. * Update benchmark for tf.scan to match ranges across eager and graph modes. * Fixed bug in `tf.reduce_prod gradient` for complex dtypes. - * Add optional `args` argument to `Dataset.from_generator()`. * Allow the use of '.' in variables (e.g. "hparams.parse('a.b=1.0')"), which would previously raise an error. This will correspond to an attribute name with an embedded '.' symbol (e.g. 'a.b'), which can only be accessed indirectly (e.g. through getattr and setattr). To set this up the user will first need to explicitly add the variable to the hparam object (e.g. "hparams.add_hparam(name='a.b', value=0.0)"). * Benchmark for tf.scan in graph and eager modes. * Added complex128 support to FFT, FFT2D, FFT3D, IFFT, IFFT2D, and IFFT3D. @@ -45,7 +47,6 @@ * LinearOperator[1D,2D,3D]Circulant added to `tensorflow.linalg`. * Conv3D, Conv3DBackpropInput, Conv3DBackpropFilter now supports arbitrary. * Added `tf.train.Checkpoint` for reading/writing object-based checkpoints. - * `Dataset.list_files()` now produces determinstic results when `shuffle=False` or a `seed` is passed. * Added LinearOperatorKronecker, a dense-free implementation of the Kronecker Product. * Allow LinearOperator to broadcast. * SavedModelBuilder will now deduplicate asset names that point to files with the same basename and the same contents. Note that this may result in new asset files included in SavedModels in cases where assets with the same name but different contents were previously overwriting each other. -- GitLab From d3b482dadfa1b59ec04ee668ebd899e6bcb4b7b8 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 8 Jun 2018 14:55:26 -0400 Subject: [PATCH 0037/2038] Update RELEASE.md (r1.9) for tfdbg and XLA --- RELEASE.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 8f76e7efb4..879ce6e440 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -26,8 +26,7 @@ * Move Keras code out of _impl folder and remove API files. * `tf.keras.Model.save_weights` now saves in TensorFlow format by default. * Enable dataset iterators to be passed to `tf.keras.Model` training/eval methods. -* Accelerated Linear Algebra (XLA): -* TensorFlow Debugger (tfdbg) CLI: +* TensorFlow Debugger (tfdbg) CLI: fix an issue in which the TensorBoard Debugger Plugin could not handle total source file size exceeding gRPC message size limit (4 MB). * `tf.contrib`: * `tf.contrib.framework.zero_initializer` supports ResourceVariable. * Adding "constrained_optimization" to tensorflow/contrib. -- GitLab From a08c8a79f3d0ea5a7fac74d8f5e9da5def89170b Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 4 Jun 2018 11:11:06 -0700 Subject: [PATCH 0038/2038] Fix visibility for tf.keras.__version__ PiperOrigin-RevId: 199161696 --- tensorflow/python/keras/__init__.py | 4 ++++ tensorflow/python/keras/integration_test.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/python/keras/__init__.py b/tensorflow/python/keras/__init__.py index 197f306097..3493069a5b 100644 --- a/tensorflow/python/keras/__init__.py +++ b/tensorflow/python/keras/__init__.py @@ -41,8 +41,12 @@ from tensorflow.python.keras.layers import Input from tensorflow.python.keras.models import Model from tensorflow.python.keras.models import Sequential +from tensorflow.python.util.tf_export import tf_export + __version__ = '2.1.6-tf' +tf_export('keras.__version__').export_constant(__name__, '__version__') + del absolute_import del division del print_function diff --git a/tensorflow/python/keras/integration_test.py b/tensorflow/python/keras/integration_test.py index 2e83544d97..2a05699407 100644 --- a/tensorflow/python/keras/integration_test.py +++ b/tensorflow/python/keras/integration_test.py @@ -29,6 +29,9 @@ from tensorflow.python.platform import test class KerasIntegrationTest(test.TestCase): + def test_version(self): + self.assertTrue(keras.__version__.endswith('-tf')) + def test_vector_classification_sequential(self): with self.test_session(): np.random.seed(1337) -- GitLab From 0eac1ebafc1e16e6440658d6b431998f3e682bbc Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 4 Jun 2018 14:46:38 -0700 Subject: [PATCH 0039/2038] Add various missing aliases for symbols in tf.keras submodules. PiperOrigin-RevId: 199198086 --- tensorflow/python/keras/losses.py | 35 ++++++++++++--- tensorflow/python/ops/init_ops.py | 21 +++++---- ...nsorflow.keras.initializers.constant.pbtxt | 18 ++++++++ ...nsorflow.keras.initializers.identity.pbtxt | 18 ++++++++ ...tensorflow.keras.initializers.normal.pbtxt | 18 ++++++++ .../tensorflow.keras.initializers.ones.pbtxt | 18 ++++++++ ...orflow.keras.initializers.orthogonal.pbtxt | 18 ++++++++ .../tensorflow.keras.initializers.pbtxt | 40 +++++++++++++++++ ...low.keras.initializers.random_normal.pbtxt | 18 ++++++++ ...ow.keras.initializers.random_uniform.pbtxt | 18 ++++++++ ....keras.initializers.truncated_normal.pbtxt | 18 ++++++++ ...ensorflow.keras.initializers.uniform.pbtxt | 18 ++++++++ .../tensorflow.keras.initializers.zeros.pbtxt | 18 ++++++++ .../api/golden/tensorflow.keras.losses.pbtxt | 44 +++++++++++++++++++ .../api/golden/tensorflow.keras.metrics.pbtxt | 44 +++++++++++++++++++ 15 files changed, 350 insertions(+), 14 deletions(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt create mode 100644 tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt diff --git a/tensorflow/python/keras/losses.py b/tensorflow/python/keras/losses.py index d82ebd9c31..9f548bfe04 100644 --- a/tensorflow/python/keras/losses.py +++ b/tensorflow/python/keras/losses.py @@ -30,19 +30,31 @@ from tensorflow.python.util.tf_export import tf_export @tf_export('keras.metrics.mean_squared_error', - 'keras.losses.mean_squared_error') + 'keras.metrics.mse', + 'keras.metrics.MSE', + 'keras.losses.mean_squared_error', + 'keras.losses.mse', + 'keras.losses.MSE') def mean_squared_error(y_true, y_pred): return K.mean(math_ops.square(y_pred - y_true), axis=-1) @tf_export('keras.metrics.mean_absolute_error', - 'keras.losses.mean_absolute_error') + 'keras.metrics.mae', + 'keras.metrics.MAE', + 'keras.losses.mean_absolute_error', + 'keras.losses.mae', + 'keras.losses.MAE') def mean_absolute_error(y_true, y_pred): return K.mean(math_ops.abs(y_pred - y_true), axis=-1) @tf_export('keras.metrics.mean_absolute_percentage_error', - 'keras.losses.mean_absolute_percentage_error') + 'keras.metrics.mape', + 'keras.metrics.MAPE', + 'keras.losses.mean_absolute_percentage_error', + 'keras.losses.mape', + 'keras.losses.MAPE') def mean_absolute_percentage_error(y_true, y_pred): diff = math_ops.abs( (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None)) @@ -50,7 +62,11 @@ def mean_absolute_percentage_error(y_true, y_pred): @tf_export('keras.metrics.mean_squared_logarithmic_error', - 'keras.losses.mean_squared_logarithmic_error') + 'keras.metrics.msle', + 'keras.metrics.MSLE', + 'keras.losses.mean_squared_logarithmic_error', + 'keras.losses.msle', + 'keras.losses.MSLE') def mean_squared_logarithmic_error(y_true, y_pred): first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.) @@ -117,7 +133,11 @@ def binary_crossentropy(y_true, y_pred): @tf_export('keras.metrics.kullback_leibler_divergence', - 'keras.losses.kullback_leibler_divergence') + 'keras.metrics.kld', + 'keras.metrics.KLD', + 'keras.losses.kullback_leibler_divergence', + 'keras.losses.kld', + 'keras.losses.KLD') def kullback_leibler_divergence(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) @@ -129,7 +149,10 @@ def poisson(y_true, y_pred): return K.mean(y_pred - y_true * math_ops.log(y_pred + K.epsilon()), axis=-1) -@tf_export('keras.metrics.cosine_proximity', 'keras.losses.cosine_proximity') +@tf_export('keras.metrics.cosine_proximity', + 'keras.metrics.cosine', + 'keras.losses.cosine_proximity', + 'keras.losses.cosine') def cosine_proximity(y_true, y_pred): y_true = nn.l2_normalize(y_true, axis=-1) y_pred = nn.l2_normalize(y_pred, axis=-1) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index 1f8d8dc4f3..2df230d470 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -86,7 +86,7 @@ class Initializer(object): @tf_export("keras.initializers.Zeros", "initializers.zeros", - "zeros_initializer") + "zeros_initializer", "keras.initializers.zeros") class Zeros(Initializer): """Initializer that generates tensors initialized to 0.""" @@ -102,7 +102,8 @@ class Zeros(Initializer): return {"dtype": self.dtype.name} -@tf_export("keras.initializers.Ones", "initializers.ones", "ones_initializer") +@tf_export("keras.initializers.Ones", "initializers.ones", "ones_initializer", + "keras.initializers.ones") class Ones(Initializer): """Initializer that generates tensors initialized to 1.""" @@ -119,7 +120,7 @@ class Ones(Initializer): @tf_export("keras.initializers.Constant", "initializers.constant", - "constant_initializer") + "constant_initializer", "keras.initializers.constant") class Constant(Initializer): """Initializer that generates tensors with constant values. @@ -225,7 +226,8 @@ class Constant(Initializer): @tf_export("keras.initializers.RandomUniform", "initializers.random_uniform", - "random_uniform_initializer") + "random_uniform_initializer", "keras.initializers.uniform", + "keras.initializers.random_uniform") class RandomUniform(Initializer): """Initializer that generates tensors with a uniform distribution. @@ -262,7 +264,8 @@ class RandomUniform(Initializer): @tf_export("keras.initializers.RandomNormal", "initializers.random_normal", - "random_normal_initializer") + "random_normal_initializer", "keras.initializers.normal", + "keras.initializers.random_normal") class RandomNormal(Initializer): """Initializer that generates tensors with a normal distribution. @@ -299,7 +302,8 @@ class RandomNormal(Initializer): @tf_export("keras.initializers.TruncatedNormal", - "initializers.truncated_normal", "truncated_normal_initializer") + "initializers.truncated_normal", "truncated_normal_initializer", + "keras.initializers.truncated_normal") class TruncatedNormal(Initializer): """Initializer that generates a truncated normal distribution. @@ -482,7 +486,7 @@ class VarianceScaling(Initializer): @tf_export("keras.initializers.Orthogonal", "initializers.orthogonal", - "orthogonal_initializer") + "orthogonal_initializer", "keras.initializers.orthogonal") class Orthogonal(Initializer): """Initializer that generates an orthogonal matrix. @@ -1062,7 +1066,8 @@ class ConvolutionOrthogonal3D(ConvolutionOrthogonal): return self._dict_to_tensor(p, ksize, ksize, ksize) -@tf_export("keras.initializers.Identity", "initializers.identity") +@tf_export("keras.initializers.Identity", "initializers.identity", + "keras.initializers.identity") class Identity(Initializer): """Initializer that generates the identity matrix. diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt new file mode 100644 index 0000000000..bddc37b907 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.constant.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.constant" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'value\', \'dtype\', \'verify_shape\'], varargs=None, keywords=None, defaults=[\'0\', \"\", \'False\'], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt new file mode 100644 index 0000000000..a4c5a61490 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.identity.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.identity" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'gain\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt new file mode 100644 index 0000000000..7485772784 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.normal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.normal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'mean\', \'stddev\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt new file mode 100644 index 0000000000..a89f78d1e1 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.ones.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.ones" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=[\"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt new file mode 100644 index 0000000000..ee1e9bbae2 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.orthogonal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.orthogonal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'gain\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt index 093c56595b..14a667870d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.pbtxt @@ -40,6 +40,46 @@ tf_module { name: "Zeros" mtype: "" } + member { + name: "constant" + mtype: "" + } + member { + name: "identity" + mtype: "" + } + member { + name: "normal" + mtype: "" + } + member { + name: "ones" + mtype: "" + } + member { + name: "orthogonal" + mtype: "" + } + member { + name: "random_normal" + mtype: "" + } + member { + name: "random_uniform" + mtype: "" + } + member { + name: "truncated_normal" + mtype: "" + } + member { + name: "uniform" + mtype: "" + } + member { + name: "zeros" + mtype: "" + } member_method { name: "deserialize" argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt new file mode 100644 index 0000000000..a6df1e87a3 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_normal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.random_normal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'mean\', \'stddev\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt new file mode 100644 index 0000000000..37a0fa0d55 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.random_uniform.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.random_uniform" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'minval\', \'maxval\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt new file mode 100644 index 0000000000..f97e93f0b7 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.truncated_normal.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.truncated_normal" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'mean\', \'stddev\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0.0\', \'1.0\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt new file mode 100644 index 0000000000..58186b1383 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.uniform.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.uniform" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'minval\', \'maxval\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'0\', \'None\', \'None\', \"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt new file mode 100644 index 0000000000..a262390687 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.zeros.pbtxt @@ -0,0 +1,18 @@ +path: "tensorflow.keras.initializers.zeros" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member_method { + name: "__init__" + argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=[\"\"], " + } + member_method { + name: "from_config" + argspec: "args=[\'cls\', \'config\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "get_config" + argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt index ae5f6305b7..eca6b91538 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.losses.pbtxt @@ -1,5 +1,25 @@ path: "tensorflow.keras.losses" tf_module { + member_method { + name: "KLD" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAPE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSLE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "binary_crossentropy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -12,6 +32,10 @@ tf_module { name: "categorical_hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "cosine" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "cosine_proximity" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -28,6 +52,10 @@ tf_module { name: "hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "kld" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "kullback_leibler_divergence" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -36,6 +64,14 @@ tf_module { name: "logcosh" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mae" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "mape" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "mean_absolute_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -52,6 +88,14 @@ tf_module { name: "mean_squared_logarithmic_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mse" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "msle" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "poisson" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt index 42729e4237..a97a9b5758 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt @@ -1,5 +1,25 @@ path: "tensorflow.keras.metrics" tf_module { + member_method { + name: "KLD" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MAPE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "MSLE" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "binary_accuracy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -16,6 +36,10 @@ tf_module { name: "categorical_crossentropy" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "cosine" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "cosine_proximity" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -32,10 +56,22 @@ tf_module { name: "hinge" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "kld" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "kullback_leibler_divergence" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mae" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "mape" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "mean_absolute_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" @@ -52,6 +88,14 @@ tf_module { name: "mean_squared_logarithmic_error" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "mse" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "msle" + argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "poisson" argspec: "args=[\'y_true\', \'y_pred\'], varargs=None, keywords=None, defaults=None" -- GitLab From 7c33a7751d77cfd70a5c441da369440f4f6b633a Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Thu, 7 Jun 2018 09:20:57 -0700 Subject: [PATCH 0040/2038] Fix bug due to incorrect nesting of return statement in eager iterator evaluation. PiperOrigin-RevId: 199645638 --- .../python/keras/engine/training_eager.py | 10 ++-- .../keras/engine/training_eager_test.py | 54 +++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/engine/training_eager.py b/tensorflow/python/keras/engine/training_eager.py index 081e46aa66..a70b488f25 100644 --- a/tensorflow/python/keras/engine/training_eager.py +++ b/tensorflow/python/keras/engine/training_eager.py @@ -501,11 +501,11 @@ def iterator_test_loop(model, inputs, steps, verbose=0): if verbose == 1: progbar.update(step_index + 1) - for i in range(len(outs)): - outs[i] /= num_samples - if len(outs) == 1: - return outs[0] - return outs + for i in range(len(outs)): + outs[i] /= num_samples + if len(outs) == 1: + return outs[0] + return outs def batch_test_loop(model, diff --git a/tensorflow/python/keras/engine/training_eager_test.py b/tensorflow/python/keras/engine/training_eager_test.py index d9446fd437..7906d208eb 100644 --- a/tensorflow/python/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/engine/training_eager_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np +from tensorflow.python.data.ops import dataset_ops from tensorflow.python import keras from tensorflow.python.framework import ops from tensorflow.python.framework import test_util as tf_test_util @@ -670,6 +671,59 @@ class CorrectnessTest(test.TestCase): outs = model.evaluate(x, y) self.assertEqual(outs[1], 0.) + @tf_test_util.run_in_graph_and_eager_modes() + def test_loss_correctness_with_iterator(self): + # Test that training loss is the same in eager and graph + # (by comparing it to a reference value in a deterministic case) + model = keras.Sequential() + model.add( + keras.layers.Dense( + 3, activation='relu', input_dim=4, kernel_initializer='ones')) + model.add( + keras.layers.Dense(2, activation='softmax', kernel_initializer='ones')) + model.compile( + loss='sparse_categorical_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + x = np.ones((100, 4), dtype=np.float32) + np.random.seed(123) + y = np.random.randint(0, 1, size=(100, 1)) + dataset = dataset_ops.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + iterator = dataset.make_one_shot_iterator() + history = model.fit(iterator, epochs=1, steps_per_epoch=10) + self.assertEqual(np.around(history.history['loss'][-1], decimals=4), 0.6173) + + @tf_test_util.run_in_graph_and_eager_modes() + def test_metrics_correctness_with_iterator(self): + model = keras.Sequential() + model.add( + keras.layers.Dense( + 8, activation='relu', input_dim=4, kernel_initializer='ones')) + model.add( + keras.layers.Dense(1, activation='sigmoid', kernel_initializer='ones')) + model.compile( + loss='binary_crossentropy', + metrics=['accuracy'], + optimizer=RMSPropOptimizer(learning_rate=0.001)) + np.random.seed(123) + x = np.random.randint(10, size=(100, 4)).astype(np.float32) + y = np.random.randint(2, size=(100, 1)).astype(np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((x, y)) + dataset = dataset.batch(10) + iterator = dataset.make_one_shot_iterator() + outs = model.evaluate(iterator, steps=10) + self.assertEqual(np.around(outs[1], decimals=1), 0.5) + + y = np.zeros((100, 1), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(100) + dataset = dataset.batch(10) + iterator = dataset.make_one_shot_iterator() + outs = model.evaluate(iterator, steps=10) + self.assertEqual(outs[1], 0.) + + if __name__ == '__main__': ops.enable_eager_execution() test.main() -- GitLab From 5177fd2f9acb9b46b9182ad782bb8b7b9386baeb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 5 Jun 2018 15:59:21 -0700 Subject: [PATCH 0041/2038] Only calls compare function if values were read from event file PiperOrigin-RevId: 199373169 --- tensorflow/python/estimator/exporter.py | 7 ++-- tensorflow/python/estimator/exporter_test.py | 34 ++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/exporter.py b/tensorflow/python/estimator/exporter.py index a7212bb83e..766ea23f2a 100644 --- a/tensorflow/python/estimator/exporter.py +++ b/tensorflow/python/estimator/exporter.py @@ -360,9 +360,10 @@ class BestExporter(Exporter): for value in event.summary.value: if value.HasField('simple_value'): event_eval_result[value.tag] = value.simple_value - if best_eval_result is None or self._compare_fn( - best_eval_result, event_eval_result): - best_eval_result = event_eval_result + if event_eval_result: + if best_eval_result is None or self._compare_fn( + best_eval_result, event_eval_result): + best_eval_result = event_eval_result return best_eval_result diff --git a/tensorflow/python/estimator/exporter_test.py b/tensorflow/python/estimator/exporter_test.py index 4cb4bffc8d..c4b006955c 100644 --- a/tensorflow/python/estimator/exporter_test.py +++ b/tensorflow/python/estimator/exporter_test.py @@ -148,6 +148,40 @@ class BestExporterTest(test.TestCase): "checkpoint_path", {"loss": 20}, False) self.assertEqual(None, export_result) + def test_best_exporter_with_empty_event(self): + + def _serving_input_receiver_fn(): + pass + + export_dir_base = tempfile.mkdtemp() + gfile.MkDir(export_dir_base) + gfile.MkDir(export_dir_base + "/export") + gfile.MkDir(export_dir_base + "/eval") + + eval_dir_base = os.path.join(export_dir_base, "eval_continuous") + estimator_lib._write_dict_to_summary(eval_dir_base, {}, 1) + estimator_lib._write_dict_to_summary(eval_dir_base, {"loss": 60}, 2) + + exporter = exporter_lib.BestExporter( + name="best_exporter", + serving_input_receiver_fn=_serving_input_receiver_fn, + event_file_pattern="eval_continuous/*.tfevents.*", + assets_extra={"from/path": "to/path"}, + as_text=False, + exports_to_keep=1) + + estimator = test.mock.Mock(spec=estimator_lib.Estimator) + estimator.model_dir = export_dir_base + estimator.export_savedmodel.return_value = "export_result_path" + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"loss": 100}, False) + self.assertEqual(None, export_result) + + export_result = exporter.export(estimator, export_dir_base, + "checkpoint_path", {"loss": 10}, False) + self.assertEqual("export_result_path", export_result) + def test_garbage_collect_exports(self): export_dir_base = tempfile.mkdtemp() gfile.MkDir(export_dir_base) -- GitLab From e7450fd344623b550eda4dc3d43e936d6a8af936 Mon Sep 17 00:00:00 2001 From: Madiyar Date: Sun, 10 Jun 2018 16:05:40 +0100 Subject: [PATCH 0042/2038] Fix code typo in eager programmers guide --- tensorflow/docs_src/programmers_guide/eager.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/eager.md b/tensorflow/docs_src/programmers_guide/eager.md index b2bc3273b4..babdb1db09 100644 --- a/tensorflow/docs_src/programmers_guide/eager.md +++ b/tensorflow/docs_src/programmers_guide/eager.md @@ -193,8 +193,7 @@ class MNISTModel(tf.keras.Model): def call(self, input): """Run the model.""" result = self.dense1(input) - result = self.dense2(result) - result = self.dense2(result) # reuse variables from dense2 layer + result = self.dense2(result) # reuse variables from dense1 layer return result model = MNISTModel() -- GitLab From 19b77a282b1ade7788ae394f22ac0bd7b0a2ce76 Mon Sep 17 00:00:00 2001 From: nrstott Date: Sun, 10 Jun 2018 20:35:14 -0400 Subject: [PATCH 0043/2038] document target_column when Y is dataframe --- tensorflow/python/estimator/inputs/pandas_io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index 6918683ce7..91eb98cbf4 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -76,7 +76,8 @@ def pandas_input_fn(x, num_threads: Integer, number of threads used for reading and enqueueing. In order to have predicted and repeatable order of reading and enqueueing, such as in prediction and evaluation mode, `num_threads` should be 1. - target_column: str, name to give the target column `y`. + target_column: str, name to give the target column `y`. This parameter + is not used when `y` is a `DataFrame`. Returns: Function, that has signature of ()->(dict of `features`, `target`) -- GitLab From 932fcbbd3836022a862d2479d716fc9c7563ff47 Mon Sep 17 00:00:00 2001 From: nrstott Date: Mon, 11 Jun 2018 10:11:35 -0400 Subject: [PATCH 0044/2038] check that target_column is correct type --- tensorflow/python/estimator/inputs/pandas_io.py | 3 +++ .../python/estimator/inputs/pandas_io_test.py | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index 91eb98cbf4..708d65ff68 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -95,6 +95,9 @@ def pandas_input_fn(x, raise TypeError('shuffle must be explicitly set as boolean; ' 'got {}'.format(shuffle)) + if isinstance(target_column, list): + raise TypeError('target_column must be a string or None') + x = x.copy() if y is not None: if target_column in x: diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index f8546abb8a..85ba29fd86 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -75,6 +75,19 @@ class PandasIoTest(test.TestCase): pandas_io.pandas_input_fn( x, y_noindex, batch_size=2, shuffle=False, num_epochs=1) + def testPandasInputFn_RaisesWhenTargetColumnIsAList(self): + if not HAS_PANDAS: + return + + x, y = self.makeTestDataFrame() + + with self.assertRaisesRegexp(TypeError, + 'target_column must be a string or None'): + pandas_io.pandas_input_fn(x, y, batch_size=2, + shuffle=False, + num_epochs=1, + target_column=['one', 'two']) + def testPandasInputFn_NonBoolShuffle(self): if not HAS_PANDAS: return -- GitLab From 5e81395f7e427615294d99b90641ef4319a0e7ad Mon Sep 17 00:00:00 2001 From: nrstott Date: Mon, 11 Jun 2018 10:20:41 -0400 Subject: [PATCH 0045/2038] use uuid to generate unique target_column name --- tensorflow/python/estimator/inputs/pandas_io.py | 5 +++-- .../python/estimator/inputs/pandas_io_test.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index 708d65ff68..dc46110e87 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import uuid import numpy as np from tensorflow.python.estimator.inputs.queues import feeding_functions @@ -46,8 +47,8 @@ def _get_unique_target_key(features, target_column_name): A unique key that can be used to insert the target into features. """ - while target_column_name in features: - target_column_name += '_n' + if target_column_name in features: + target_column_name += '_' + str(uuid.uuid4()) return target_column_name diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index 85ba29fd86..fa75e1ba4a 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -143,6 +143,22 @@ class PandasIoTest(test.TestCase): self.assertAllEqual(targets['a'], [10, 11]) self.assertAllEqual(targets['b'], [50, 51]) + def testPandasInputFnWhenYIsDataFrame_HandlesOverlappingColumnNamesInTargets(self): + if not HAS_PANDAS: + return + with self.test_session() as session: + x, y = self.makeTestDataFrameWithYAsDataFrame() + y = y.rename(columns={'a_target': 'a', 'b_target': 'a_n'}) + input_fn = pandas_io.pandas_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + + features, targets = self.callInputFnOnce(input_fn, session) + + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertAllEqual(targets['a'], [10, 11]) + self.assertAllEqual(targets['a_n'], [50, 51]) + def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return -- GitLab From 5fc6ebb1db3b7714f5449737438ac5007abbb410 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 11 Jun 2018 15:10:41 -0700 Subject: [PATCH 0046/2038] Try importing TRTOps to import_pb_to_tensorboard script to convert TF-TRT optimized graphs to tensorboard graphs --- tensorflow/python/tools/import_pb_to_tensorboard.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/python/tools/import_pb_to_tensorboard.py b/tensorflow/python/tools/import_pb_to_tensorboard.py index 00de044505..96f47c85da 100644 --- a/tensorflow/python/tools/import_pb_to_tensorboard.py +++ b/tensorflow/python/tools/import_pb_to_tensorboard.py @@ -29,6 +29,13 @@ from tensorflow.python.platform import app from tensorflow.python.platform import gfile from tensorflow.python.summary import summary +# Try importing TensorRT ops if available +# pylint: disable=unused-import,trailing-whitespace,g-import-not-at-top,wildcard-import +try: + from tensorflow.contrib.tensorrt.ops.gen_trt_engine_op import * +except ImportError: + pass +# pylint: enable=unused-import,trailing-whitespace,g-import-not-at-top,wildcard-import def import_to_tensorboard(model_dir, log_dir): """View an imported protobuf model (`.pb` file) as a graph in Tensorboard. -- GitLab From 4fe8d4a14936dc38558a858283574993909c9895 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 27 May 2018 10:49:12 -0700 Subject: [PATCH 0047/2038] TPUEstimator.export_savedmodel() saves a SavedModel with both TPU and CPU graphs. PiperOrigin-RevId: 198229550 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 4465833f88..c8c08a5a63 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1807,7 +1807,7 @@ class TPUEstimator(estimator_lib.Estimator): export_outputs['classes'] = export_output_lib.ClassificationOutput(classes=classes) - tpu.outside_compilation(host_call, logits) + tpu.outside_compilation(host_call, [logits]) ... ``` @@ -1969,7 +1969,7 @@ class TPUEstimator(estimator_lib.Estimator): input_receiver_fn_map[mode]} export_tags = [tag_constants.SERVING, tag_constants.TPU] mode = _REWRITE_FOR_INFERENCE_MODE - try: + if self._export_to_tpu: (super(TPUEstimator, self). _add_meta_graph_for_mode(builder, input_receiver_fn_map, @@ -1978,9 +1978,6 @@ class TPUEstimator(estimator_lib.Estimator): save_variables=False, mode=mode, export_tags=export_tags)) - except Exception as error: # pylint: disable=broad-except - logging.warning('Saving meta graph for TPU failed: {}.' - .format(str(error))) def _call_model_fn(self, features, labels, mode, config): if mode == _REWRITE_FOR_INFERENCE_MODE: -- GitLab From 51ad43efe5d918e7c57bd6c612fc1d0efd0b0664 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 29 May 2018 14:28:59 -0700 Subject: [PATCH 0048/2038] In TPUEstimator.export_savedmodel(), if saving TPU metegraph fails, issue a warning instead so that user can still use the CPU metagraph. PiperOrigin-RevId: 198458571 -- GitLab From 982f3e3038f8d07964b2c58843a51bd9745a8990 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 1 Jun 2018 16:32:20 -0700 Subject: [PATCH 0049/2038] Allow user to opt out of saving metagraph for TPU with TPUEstimator.export_output(). PiperOrigin-RevId: 198944144 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index c8c08a5a63..7c770912b4 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1830,6 +1830,7 @@ class TPUEstimator(estimator_lib.Estimator): predict_batch_size=None, batch_axis=None, eval_on_tpu=True, + export_to_tpu=True, warm_start_from=None): """Constructs an `TPUEstimator` instance. @@ -1872,6 +1873,8 @@ class TPUEstimator(estimator_lib.Estimator): False or `PER_HOST_V2`, batch_axis is ignored. eval_on_tpu: If False, evaluation runs on CPU or GPU. In this case, the model_fn must return `EstimatorSpec` when called with `mode` as `EVAL`. + export_to_tpu: If True, `export_savedmodel()` exports a metagraph for + serving on TPU besides the one on CPU. warm_start_from: Optional string filepath to a checkpoint or SavedModel to warm-start from, or a `tf.estimator.WarmStartSettings` object to fully configure warm-starting. If the string @@ -1943,6 +1946,8 @@ class TPUEstimator(estimator_lib.Estimator): use_tpu, eval_on_tpu) + self._export_to_tpu = export_to_tpu + self._is_input_fn_invoked = None def _add_meta_graph_for_mode(self, @@ -1965,11 +1970,11 @@ class TPUEstimator(estimator_lib.Estimator): save_variables, mode=mode) - input_receiver_fn_map = {_REWRITE_FOR_INFERENCE_MODE: - input_receiver_fn_map[mode]} - export_tags = [tag_constants.SERVING, tag_constants.TPU] - mode = _REWRITE_FOR_INFERENCE_MODE if self._export_to_tpu: + input_receiver_fn_map = {_REWRITE_FOR_INFERENCE_MODE: + input_receiver_fn_map[mode]} + export_tags = [tag_constants.SERVING, tag_constants.TPU] + mode = _REWRITE_FOR_INFERENCE_MODE (super(TPUEstimator, self). _add_meta_graph_for_mode(builder, input_receiver_fn_map, -- GitLab From 6cc2741eb1c9b19742b32b8edda39090afbf5abf Mon Sep 17 00:00:00 2001 From: DavidNorman Date: Tue, 12 Jun 2018 09:32:53 +0100 Subject: [PATCH 0050/2038] Fix python lint errors --- tensorflow/compiler/tests/binary_ops_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 64eeed8312..823afbbbdc 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -687,11 +687,12 @@ class BinaryOpsTest(XLATestCase): np.float32(7), expected=np.array([[False], [False], [True]], dtype=np.bool)) if np.int64 in self.numeric_types: - self._testBinary( - less_op, - np.array([[10], [7], [2], [-1]], dtype=np.int64), - np.int64(7), - expected=np.array([[False], [False], [True], [True]], dtype=np.bool)) + self._testBinary( + less_op, + np.array([[10], [7], [2], [-1]], dtype=np.int64), + np.int64(7), + expected=np.array( + [[False], [False], [True], [True]], dtype=np.bool)) for less_equal_op in [math_ops.less_equal, (lambda x, y: x <= y)]: self._testBinary( -- GitLab From 9671a9d1981c9f1dba4899f98c397f7be9565e53 Mon Sep 17 00:00:00 2001 From: Grzegorz Pawelczak Date: Tue, 12 Jun 2018 11:06:44 +0100 Subject: [PATCH 0051/2038] [XLA] Reduce maxval in random ops test. Reduced maxval in testRandomUniformIsNotConstant since the current value causes an overflow when using a device which supports DT_HALF. --- tensorflow/compiler/tests/random_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py index f13dff9620..df458ba8b9 100644 --- a/tensorflow/compiler/tests/random_ops_test.py +++ b/tensorflow/compiler/tests/random_ops_test.py @@ -54,7 +54,7 @@ class RandomOpsTest(XLATestCase): def testRandomUniformIsNotConstant(self): def rng(dtype): - return random_ops.random_uniform(shape=[2], dtype=dtype, maxval=1000000) + return random_ops.random_uniform(shape=[2], dtype=dtype, maxval=100) for dtype in self._random_types(): self._testRngIsNotConstant(rng, dtype) -- GitLab From fb704f74c4f697a1d85cb18e24a48f1a86d0825b Mon Sep 17 00:00:00 2001 From: Grzegorz Pawelczak Date: Tue, 12 Jun 2018 18:25:58 +0100 Subject: [PATCH 0052/2038] [XLA] Skip float16 adam_tests. Adam tests compare the output of the computation against a hand wirtten numpy solution which does not take into account the precision and hence fail for float16. --- tensorflow/compiler/tests/adam_test.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/compiler/tests/adam_test.py b/tensorflow/compiler/tests/adam_test.py index 3215dc36e5..ee56a38f94 100644 --- a/tensorflow/compiler/tests/adam_test.py +++ b/tensorflow/compiler/tests/adam_test.py @@ -52,6 +52,8 @@ class AdamOptimizerTest(XLATestCase): def testBasic(self): for dtype in self.float_types: + if dtype == np.float16: + continue with self.test_session(), self.test_scope(): variable_scope.get_variable_scope().set_use_resource(True) @@ -91,6 +93,8 @@ class AdamOptimizerTest(XLATestCase): def testTensorLearningRate(self): for dtype in self.float_types: + if dtype == np.float16: + continue with self.test_session(), self.test_scope(): variable_scope.get_variable_scope().set_use_resource(True) @@ -130,6 +134,8 @@ class AdamOptimizerTest(XLATestCase): def testSharing(self): for dtype in self.float_types: + if dtype == np.float16: + continue with self.test_session(), self.test_scope(): variable_scope.get_variable_scope().set_use_resource(True) -- GitLab From 56150c9829b79c2249a4b90087ce25b1e6624f0b Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 12 Jun 2018 15:37:42 -0700 Subject: [PATCH 0053/2038] code refactoring per Rasmus's suggestions on PR 19754 --- tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc | 11 ++++++----- tensorflow/core/kernels/mkl_conv_grad_input_ops.cc | 10 ++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index d12bccc02a..b3f1bdcc7d 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -84,7 +84,8 @@ template class MklConv2DBwdFilterPrimitive : public MklPrimitive { public: explicit MklConv2DBwdFilterPrimitive( - const MklConvBwdFilterParams& convBwdFilterDims) { + const MklConvBwdFilterParams& convBwdFilterDims) : + cpu_engine_(engine::cpu, 0) { context_.bwd_filter_stream.reset(new stream(stream::kind::eager)); // create conv primitive if (context_.conv_bwd_filter == nullptr) { @@ -203,9 +204,7 @@ class MklConv2DBwdFilterPrimitive : public MklPrimitive { diff_bias_md(nullptr), diff_dst_md(nullptr), bwd_filter_stream(nullptr) { } - } context_; - - engine cpu_engine_ = engine(engine::cpu, 0); + }; // Setup Conv2d backward filter (weights) primitives. void Setup(const MklConvBwdFilterParams& convBwdFilterDims) { @@ -290,8 +289,10 @@ class MklConv2DBwdFilterPrimitive : public MklPrimitive { } context_.bwd_filter_primitives.push_back(*context_.conv_bwd_filter); - return; } + + struct ConvBwdFilterContext context_; + engine cpu_engine_; }; template diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index e4b8564589..4d9493725d 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -84,7 +84,8 @@ template class MklConv2DBwdInputPrimitive : public MklPrimitive { public: explicit MklConv2DBwdInputPrimitive( - const MklConvBwdInputParams& convBwdInputDims) { + const MklConvBwdInputParams& convBwdInputDims) : + cpu_engine_(engine::cpu, 0) { context_.bwd_input_stream.reset(new stream(stream::kind::eager)); // create conv primitive @@ -169,9 +170,8 @@ class MklConv2DBwdInputPrimitive : public MklPrimitive { diff_src_md(nullptr), filter_md(nullptr), diff_dst_md(nullptr), bwd_input_stream(nullptr) { } - } context_; + }; - engine cpu_engine_ = engine(engine::cpu, 0); void Setup(const MklConvBwdInputParams& convBwdInputDims) { // create memory descriptors for convolution data w/ no specified format @@ -226,8 +226,10 @@ class MklConv2DBwdInputPrimitive : public MklPrimitive { *context_.filter_mem, *context_.diff_src_mem)); context_.bwd_input_primitives.push_back(*context_.conv_bwd_input); - return; } + + struct ConvBwdInputContext context_; + engine cpu_engine_; }; template -- GitLab From 9aca063ab5417fbda5217352330b62a4115286df Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 12 Jun 2018 16:05:44 -0700 Subject: [PATCH 0054/2038] code refactoring per Rasmus's suggestions on PR 19754 --- .../core/kernels/mkl_fused_batch_norm_op.cc | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index f4f6f8457d..b70724968c 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -702,7 +702,8 @@ template class MklFusedBatchNormFwdPrimitive : public MklPrimitive { public: explicit MklFusedBatchNormFwdPrimitive( - const MklBatchNormFwdParams& fwdParams) { + const MklBatchNormFwdParams& fwdParams) : + cpu_engine_(engine::cpu, 0) { context_.fwd_stream.reset( new mkldnn::stream(mkldnn::stream::kind::eager)); if (context_.bn_fwd == nullptr) @@ -750,7 +751,6 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { context_.mean_mem->set_data_handle(DummyData); context_.variance_mem->set_data_handle(DummyData); } - return; } memory::primitive_desc GetDstPd() const { @@ -791,7 +791,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { weights_mem(nullptr), dst_mem(nullptr), mean_mem(nullptr), variance_mem(nullptr), bn_fwd(nullptr), fwd_stream(nullptr) { } - } context_; + }; void Setup(const MklBatchNormFwdParams& fwdParams) { context_.flags = fwdParams.training ? use_scale_shift @@ -864,14 +864,14 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive { } context_.fwd_primitives.push_back(*context_.bn_fwd); - return; } mkldnn::memory::desc get_desc_data(const mkldnn::memory &m) const { return m.get_primitive_desc().desc().data; } - engine cpu_engine_ = engine(engine::cpu, 0); + struct BatchNormFwdContext context_; + engine cpu_engine_; }; template @@ -942,7 +942,8 @@ template class MklFusedBatchNormBwdPrimitive : public MklPrimitive { public: explicit MklFusedBatchNormBwdPrimitive( - const MklBatchNormBwdParams& bwdParams) { + const MklBatchNormBwdParams& bwdParams) : + cpu_engine_(engine::cpu, 0) { context_.bwd_stream.reset( new mkldnn::stream(mkldnn::stream::kind::eager)); if (context_.bn_bwd == nullptr) @@ -993,7 +994,6 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { context_.diff_weights_mem->set_data_handle(DummyData); } context_.diff_src_mem->set_data_handle(DummyData); - return; } mkldnn_memory_format_t GetSrcFmt() { @@ -1032,7 +1032,8 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { diff_dst_mem(nullptr), weights_mem(nullptr), diff_weights_mem(nullptr), diff_src_mem(nullptr), bwd_stream(nullptr) { } - } context_; + }; + void Setup(const MklBatchNormBwdParams& bwdParams) { context_.flags = bwdParams.training ? use_scale_shift : (use_scale_shift | use_global_stats); @@ -1090,10 +1091,10 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive { *context_.variance_mem, *context_.diff_dst_mem, *context_.weights_mem, *context_.diff_src_mem, *context_.diff_weights_mem)); context_.bwd_primitives.push_back(*context_.bn_bwd); - return; } - engine cpu_engine_ = engine(engine::cpu, 0); + struct BatchNormBwdContext context_; + engine cpu_engine_; }; template -- GitLab From ff9bf67219d99ac46983bcb601774f11feeaf343 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 12 Jun 2018 16:23:42 -0700 Subject: [PATCH 0055/2038] code refactoring per Rasmus's suggestions on PR 19754 --- .../core/kernels/mkl_pooling_ops_common.cc | 3 --- .../core/kernels/mkl_pooling_ops_common.h | 20 +++++++++++-------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc index df58a1edec..9e0fb1e2c9 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -92,7 +92,6 @@ void MklPoolingFwdPrimitive::Setup(const MklPoolingParams& fwdParams) { } context_.fwd_primitives.push_back(*context_.fwd); - return; } template @@ -115,7 +114,6 @@ void MklPoolingFwdPrimitive::Execute(const T* src_data, const T* dst_data, assert(ws != nullptr); context_.ws_mem->set_data_handle(DummyData); } - return; } template class MklPoolingFwdPrimitive; @@ -199,7 +197,6 @@ void MklPoolingBwdPrimitive::Execute(const T* diff_dst_data, assert(ws_data != nullptr); context_.ws_mem->set_data_handle(DummyData); } - return; } template class MklPoolingBwdPrimitive; diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index f8d6b64b0f..3f724d7111 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -65,7 +65,8 @@ struct MklPoolingParams { template class MklPoolingFwdPrimitive : public MklPrimitive { public: - explicit MklPoolingFwdPrimitive(const MklPoolingParams& fwdParams) { + explicit MklPoolingFwdPrimitive(const MklPoolingParams& fwdParams) : + cpu_engine_(engine::cpu, 0) { context_.fwd_stream.reset(new stream(stream::kind::eager)); if (context_.fwd == nullptr) Setup(fwdParams); @@ -96,7 +97,6 @@ class MklPoolingFwdPrimitive : public MklPrimitive { private: void Setup(const MklPoolingParams& fwdParams); - struct PoolingFwdContext { // algorithm mkldnn::algorithm alg_kind; @@ -135,9 +135,10 @@ class MklPoolingFwdPrimitive : public MklPrimitive { dst_mem(nullptr), fwd_desc(nullptr), fwd_pd(nullptr), src_md(nullptr), dst_md(nullptr), fwd(nullptr), fwd_stream(nullptr) { } - } context_; + }; - engine cpu_engine_ = engine(engine::cpu, 0); + struct PoolingFwdContext context_; + engine cpu_engine_; }; template @@ -200,7 +201,8 @@ class MklPoolingFwdPrimitiveFactory : public MklPrimitiveFactory { template class MklPoolingBwdPrimitive : public MklPrimitive { public: - explicit MklPoolingBwdPrimitive(const MklPoolingParams& bwdParams) { + explicit MklPoolingBwdPrimitive(const MklPoolingParams& bwdParams) : + cpu_engine(engine::cpu, 0) { context_.bwd_stream.reset(new stream(stream::kind::eager)); if (context_.bwd == nullptr) Setup(bwdParams); @@ -238,6 +240,7 @@ class MklPoolingBwdPrimitive : public MklPrimitive { private: void Setup(const MklPoolingParams& bwdParams); + // Primitive reuse context for pooling bwd ops struct PoolingBwdContext { // algorithm @@ -280,9 +283,10 @@ class MklPoolingBwdPrimitive : public MklPrimitive { fwd_desc(nullptr), bwd_desc(nullptr), fwd_pd(nullptr), bwd_pd(nullptr), bwd(nullptr), bwd_stream(nullptr) { } - } context_; - // cpu engine - engine cpu_engine = engine(engine::cpu, 0); + }; + + struct PoolingBwdContext context_; + engine cpu_engine; }; template -- GitLab From fd44596bc4b3ea8c67838b728b450a44e35c1b89 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 11 Jun 2018 17:21:06 -0700 Subject: [PATCH 0056/2038] Merging --- tensorflow/tools/api/generator/BUILD | 24 ++++++ .../tools/api/generator/create_python_api.py | 54 +++++++++++-- tensorflow/tools/api/generator/doc_srcs.py | 65 +++++++++++++++ .../tools/api/generator/doc_srcs_test.py | 80 +++++++++++++++++++ 4 files changed, 217 insertions(+), 6 deletions(-) create mode 100644 tensorflow/tools/api/generator/doc_srcs.py create mode 100644 tensorflow/tools/api/generator/doc_srcs_test.py diff --git a/tensorflow/tools/api/generator/BUILD b/tensorflow/tools/api/generator/BUILD index f0c5877a90..3a28153e52 100644 --- a/tensorflow/tools/api/generator/BUILD +++ b/tensorflow/tools/api/generator/BUILD @@ -5,12 +5,21 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +load("//tensorflow/tools/api/generator:api_gen.bzl", "TENSORFLOW_API_INIT_FILES") + +py_library( + name = "doc_srcs", + srcs = ["doc_srcs.py"], + srcs_version = "PY2AND3", +) + py_binary( name = "create_python_api", srcs = ["create_python_api.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + ":doc_srcs", "//tensorflow/python:no_contrib", ], ) @@ -24,3 +33,18 @@ py_test( "//tensorflow/python:client_testlib", ], ) + +py_test( + name = "tensorflow_doc_srcs_test", + srcs = ["doc_srcs_test.py"], + args = [ + "--package=tensorflow.python", + ] + TENSORFLOW_API_INIT_FILES, + main = "doc_srcs_test.py", + srcs_version = "PY2AND3", + deps = [ + ":doc_srcs", + "//tensorflow/python:client_testlib", + "//tensorflow/python:no_contrib", + ], +) diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index 9f210ad42b..31f287b7fe 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -25,6 +25,8 @@ import os import sys from tensorflow.python.util import tf_decorator +from tensorflow.python.util import tf_export +from tensorflow.tools.api.generator import doc_srcs _API_CONSTANTS_ATTR = '_tf_api_constants' @@ -36,10 +38,9 @@ _SYMBOLS_TO_SKIP_EXPLICITLY = { # would have side effects. 'tensorflow.python.platform.flags.FLAGS' } -_GENERATED_FILE_HEADER = """\"\"\"Imports for Python API. - -This file is MACHINE GENERATED! Do not edit. -Generated by: tensorflow/tools/api/generator/create_python_api.py script. +_GENERATED_FILE_HEADER = """# This file is MACHINE GENERATED! Do not edit. +# Generated by: tensorflow/tools/api/generator/create_python_api.py script. +\"\"\"%s \"\"\" from __future__ import print_function @@ -254,6 +255,44 @@ def get_module(dir_path, relative_to_dir): return dir_path.replace('/', '.').strip('.') +def get_module_docstring(module_name, package): + """Get docstring for the given module. + + This method looks for docstring in the following order: + 1. Checks if module has a docstring specified in doc_srcs. + 2. Checks if module has a docstring source module specified + in doc_srcs. If it does, gets docstring from that module. + 3. Checks if module with module_name exists under base package. + If it does, gets docstring from that module. + 4. Returns a default docstring. + + Args: + module_name: module name relative to tensorflow + (excluding 'tensorflow.' prefix) to get a docstring for. + package: Base python package containing python with target tf_export + decorators. + + Returns: + One-line docstring to describe the module. + """ + # Module under base package to get a docstring from. + docstring_module_name = module_name + + if module_name in doc_srcs.TENSORFLOW_DOC_SOURCES: + docsrc = doc_srcs.TENSORFLOW_DOC_SOURCES[module_name] + if docsrc.docstring: + return docsrc.docstring + if docsrc.docstring_module_name: + docstring_module_name = docsrc.docstring_module_name + + docstring_module_name = package + '.' + docstring_module_name + if (docstring_module_name in sys.modules and + sys.modules[docstring_module_name].__doc__): + return sys.modules[docstring_module_name].__doc__ + + return 'Public API for tf.%s namespace.' % module_name + + def create_api_files( output_files, package, root_init_template, output_dir): """Creates __init__.py files for the Python API. @@ -296,7 +335,10 @@ def create_api_files( continue contents = '' if module or not root_init_template: - contents = _GENERATED_FILE_HEADER + text + _GENERATED_FILE_FOOTER + contents = ( + _GENERATED_FILE_HEADER % + get_module_docstring(module, package) + text + + _GENERATED_FILE_FOOTER) else: # Read base init file with open(root_init_template, 'r') as root_init_template_file: @@ -309,7 +351,7 @@ def create_api_files( raise ValueError( 'Missing outputs for python_api_gen genrule:\n%s.' 'Make sure all required outputs are in the ' - 'tensorflow/tools/api/generator/BUILD file.' % + 'tensorflow/tools/api/generator/api_gen.bzl file.' % ',\n'.join(sorted(missing_output_files))) diff --git a/tensorflow/tools/api/generator/doc_srcs.py b/tensorflow/tools/api/generator/doc_srcs.py new file mode 100644 index 0000000000..74f6db98fd --- /dev/null +++ b/tensorflow/tools/api/generator/doc_srcs.py @@ -0,0 +1,65 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Specifies sources of doc strings for API modules.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + + +# Specifies docstring source for a module. +# Only one of docstring or docstring_module_name should be set. +# * If docstring is set, then we will use this docstring when +# for the module. +# * If docstring_module_name is set, then we will copy the docstring +# from docstring source module. +DocSource = collections.namedtuple( + 'DocSource', ['docstring', 'docstring_module_name']) +# Each attribute of DocSource is optional. +DocSource.__new__.__defaults__ = (None,) * len(DocSource._fields) + +TENSORFLOW_DOC_SOURCES = { + 'app': DocSource(docstring_module_name='platform.app'), + 'compat': DocSource(docstring_module_name='util.compat'), + 'distributions': DocSource( + docstring_module_name='ops.distributions.distributions'), + 'bitwise': DocSource(docstring_module_name='ops.bitwise_ops'), + 'errors': DocSource(docstring_module_name='framework.errors'), + 'gfile': DocSource(docstring_module_name='platform.gfile'), + 'graph_util': DocSource(docstring_module_name='framework.graph_util'), + 'image': DocSource(docstring_module_name='ops.image_ops'), + 'keras.estimator': DocSource(docstring_module_name='estimator.keras'), + 'linalg': DocSource(docstring_module_name='ops.linalg_ops'), + 'logging': DocSource(docstring_module_name='ops.logging_ops'), + 'losses': DocSource(docstring_module_name='ops.losses.losses'), + 'manip': DocSource(docstring_module_name='ops.manip_ops'), + 'math': DocSource(docstring_module_name='ops.math_ops'), + 'metrics': DocSource(docstring_module_name='ops.metrics'), + 'nn': DocSource(docstring_module_name='ops.nn_ops'), + 'nn.rnn_cell': DocSource(docstring_module_name='ops.rnn_cell'), + 'python_io': DocSource(docstring_module_name='lib.io.python_io'), + 'resource_loader': DocSource( + docstring_module_name='platform.resource_loader'), + 'sets': DocSource(docstring_module_name='ops.sets'), + 'sparse': DocSource(docstring_module_name='ops.sparse_ops'), + 'spectral': DocSource(docstring_module_name='ops.spectral_ops'), + 'strings': DocSource(docstring_module_name='ops.string_ops'), + 'sysconfig': DocSource(docstring_module_name='platform.sysconfig'), + 'test': DocSource(docstring_module_name='platform.test'), + 'train': DocSource(docstring_module_name='training.training'), + 'train.queue_runner': DocSource( + docstring_module_name='training.queue_runner'), +} diff --git a/tensorflow/tools/api/generator/doc_srcs_test.py b/tensorflow/tools/api/generator/doc_srcs_test.py new file mode 100644 index 0000000000..9ba95a3439 --- /dev/null +++ b/tensorflow/tools/api/generator/doc_srcs_test.py @@ -0,0 +1,80 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""Tests for tensorflow.tools.api.generator.doc_srcs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import importlib +import sys + +from tensorflow.python.platform import test +from tensorflow.tools.api.generator import doc_srcs + + +FLAGS = None + + +class DocSrcsTest(test.TestCase): + + def testModulesAreValidAPIModules(self): + for module_name in doc_srcs.TENSORFLOW_DOC_SOURCES: + # Convert module_name to corresponding __init__.py file path. + file_path = module_name.replace('.', '/') + if file_path: + file_path += '/' + file_path += '__init__.py' + + if file_path not in FLAGS.outputs: + self.assertFalse('%s is not a valid API module' % module_name) + + def testHaveDocstringOrDocstringModule(self): + for module_name, docsrc in doc_srcs.TENSORFLOW_DOC_SOURCES.items(): + if docsrc.docstring and docsrc.docstring_module_name: + self.assertFalse( + '%s contains DocSource has both a docstring and a ' + 'docstring_module_name. ' + 'Only one of "docstring" or "docstring_module_name" should be set.' + % (module_name)) + + def testDocstringModulesAreValidModules(self): + for _, docsrc in doc_srcs.TENSORFLOW_DOC_SOURCES.items(): + if docsrc.docstring_module_name: + doc_module_name = '.'.join([ + FLAGS.package, docsrc.docstring_module_name]) + if doc_module_name not in sys.modules: + sys.assertFalse( + 'docsources_module %s is not a valid module under %s.' % + (docsrc.docstring_module_name, FLAGS.package)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + 'outputs', metavar='O', type=str, nargs='+', + help='create_python_api output files.') + parser.add_argument( + '--package', type=str, + help='Base package that imports modules containing the target tf_export ' + 'decorators.') + FLAGS, unparsed = parser.parse_known_args() + + importlib.import_module(FLAGS.package) + + # Now update argv, so that unittest library does not get confused. + sys.argv = [sys.argv[0]] + unparsed + test.main() -- GitLab From e042e3e051d3bd6bfb63dfd4ad407a82f7d1dacc Mon Sep 17 00:00:00 2001 From: Anna R Date: Tue, 12 Jun 2018 17:47:58 -0700 Subject: [PATCH 0057/2038] Remove unused tf_export import --- tensorflow/tools/api/generator/create_python_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index 31f287b7fe..e3ab056efc 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -25,7 +25,6 @@ import os import sys from tensorflow.python.util import tf_decorator -from tensorflow.python.util import tf_export from tensorflow.tools.api.generator import doc_srcs -- GitLab From f055a9f2f21154140785b9da7c3b2eae88e65623 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 12 Jun 2018 18:09:35 -0700 Subject: [PATCH 0058/2038] Check to ensure the Cloud TPU is ready before resolving. Cherry picking this into the TF 1.9 release. PiperOrigin-RevId: 200095692 Previous commit: 32c8013f0ab3feb139648ae759e2d0168fb5dc95 --- .../python/training/tpu_cluster_resolver.py | 3 ++ .../training/tpu_cluster_resolver_test.py | 44 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 880fca4ea6..935ad5ff37 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -255,6 +255,9 @@ class TPUClusterResolver(ClusterResolver): request = self._service.projects().locations().nodes().get(name=full_name) response = request.execute() + if 'state' in response and response['state'] != 'READY': + raise RuntimeError('TPU "%s" is not yet ready; state: "%s"' % + (self._tpu, response['state'])) if 'health' in response and response['health'] != 'HEALTHY': raise RuntimeError('TPU "%s" is unhealthy: "%s"' % (self._tpu, response['health'])) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 5fac55fd02..7e002cc72f 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -157,6 +157,50 @@ class TPUClusterResolverTest(test.TestCase): job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testUnhealthyCloudTpu(self): + tpu_map = { + 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { + 'ipAddress': '10.1.2.3', + 'port': '8470', + 'health': 'UNHEALTHY' + } + } + + tpu_cluster_resolver = TPUClusterResolver( + project=None, + zone=None, + tpu='test-tpu-1', + coordinator_name=None, + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + with self.assertRaises(RuntimeError): + tpu_cluster_resolver.cluster_spec() + + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testNotReadyCloudTpu(self): + tpu_map = { + 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { + 'ipAddress': '10.1.2.3', + 'port': '8470', + 'state': 'CREATING' + } + } + + tpu_cluster_resolver = TPUClusterResolver( + project=None, + zone=None, + tpu='test-tpu-1', + coordinator_name=None, + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + with self.assertRaises(RuntimeError): + tpu_cluster_resolver.cluster_spec() def testSimpleSuccessfulRetrieval(self): tpu_map = { -- GitLab From 9a087a42293be8342570039d2c6d329a0589b773 Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Wed, 13 Jun 2018 00:30:09 -0700 Subject: [PATCH 0059/2038] Update tensorboard dependency to 1.9.x --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 97f625e7e9..92a1465cea 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -55,7 +55,7 @@ REQUIRED_PACKAGES = [ 'six >= 1.10.0', 'protobuf >= 3.4.0', 'setuptools <= 39.1.0', - 'tensorboard >= 1.8.0, < 1.9.0', + 'tensorboard >= 1.9.0, < 1.10.0', 'termcolor >= 1.1.0', ] -- GitLab From b1d0048f2be83d6c6f7e1be996ef9c8358922aa6 Mon Sep 17 00:00:00 2001 From: Pete Warden Date: Wed, 13 Jun 2018 01:06:50 -0700 Subject: [PATCH 0060/2038] Documentation for Raspberry Pi installation --- tensorflow/docs_src/install/index.md | 2 + .../docs_src/install/install_raspbian.md | 317 ++++++++++++++++++ 2 files changed, 319 insertions(+) create mode 100644 tensorflow/docs_src/install/install_raspbian.md diff --git a/tensorflow/docs_src/install/index.md b/tensorflow/docs_src/install/index.md index 4f85383925..c2e5a991d4 100644 --- a/tensorflow/docs_src/install/index.md +++ b/tensorflow/docs_src/install/index.md @@ -6,6 +6,7 @@ operating systems: * macOS 10.12.6 (Sierra) or later. * Ubuntu 16.04 or later * Windows 7 or later. + * Raspbian 9.0 or later. Although you might be able to install TensorFlow on other laptop or desktop systems, we only support (and only fix issues in) the preceding configurations. @@ -16,6 +17,7 @@ that enables you to write applications in Python: * @{$install_linux$Installing TensorFlow on Ubuntu} * @{$install_mac$Installing TensorFlow on macOS} * @{$install_windows$Installing TensorFlow on Windows} + * @{$install_raspbian$Installing TensorFlow on a Raspberry Pi} * @{$install_sources$Installing TensorFlow from Sources} Many aspects of the Python TensorFlow API changed from version 0.n to 1.0. diff --git a/tensorflow/docs_src/install/install_raspbian.md b/tensorflow/docs_src/install/install_raspbian.md new file mode 100644 index 0000000000..2f425162a1 --- /dev/null +++ b/tensorflow/docs_src/install/install_raspbian.md @@ -0,0 +1,317 @@ +# Installing TensorFlow on Raspbian + +This guide explains how to install TensorFlow on a Raspberry Pi running +Raspbian. Although these instructions might also work on other Pi variants, we +have only tested (and we only support) these instructions on machines meeting +the following requirements: + +* Raspberry Pi devices running Raspbian 9.0 or higher + +## Determine how to install TensorFlow + +You must pick the mechanism by which you install TensorFlow. The supported +choices are as follows: + +* "Native" pip. +* Cross-compiling from sources. + +**We recommend pip installation.** + +## Installing with native pip + +We have uploaded the TensorFlow binaries to piwheels.org. Therefore, you can +install TensorFlow through pip. + +The [REQUIRED_PACKAGES section of +setup.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/setup.py) +lists the packages that pip will install or upgrade. + +### Prerequisite: Python + +In order to install TensorFlow, your system must contain one of the following +Python versions: + +* Python 2.7 +* Python 3.4+ + +If your system does not already have one of the preceding Python versions, +[install](https://wiki.python.org/moin/BeginnersGuide/Download) it now. It +should already be included when Raspbian was installed though, so no extra steps +should be needed. + +### Prerequisite: pip + +[Pip](https://en.wikipedia.org/wiki/Pip_\(package_manager\)) installs and +manages software packages written in Python. If you intend to install with +native pip, then one of the following flavors of pip must be installed on your +system: + +* `pip3`, for Python 3.n (preferred). +* `pip`, for Python 2.7. + +`pip` or `pip3` was probably installed on your system when you installed Python. +To determine whether pip or pip3 is actually installed on your system, issue one +of the following commands: + +
$ pip3 -V # for Python 3.n
+$ pip -V  # for Python 2.7
+ +If it gives the error "Command not found", then the package has not been +installed yet. To install if for the first time, run: + +
$ sudo apt-get install python3-pip # for Python 3.n
+sudo apt-get install python-pip # for Python 2.7
+ +You can find more help on installing and upgrading pip in +[the Raspberry Pi documentation](https://www.raspberrypi.org/documentation/linux/software/python.md). + +### Prerequisite: Atlas + +[Atlas](http://math-atlas.sourceforge.net/) is a linear algebra library that +numpy depends on, and so needs to be installed before TensorFlow. To add it to +your system, run the following command: + +
$ sudo apt install libatlas-base-dev
+ +### Install TensorFlow + +Assuming the prerequisite software is installed on your Pi, install TensorFlow +by invoking **one** of the following commands: + +
 $ pip3 install tensorflow     # Python 3.n
+     $ pip install tensorflow      # Python 2.7
+ +This can take some time on certain platforms like the Pi Zero, where some Python +packages like scipy that TensorFlow depends on need to be compiled before the +installation can complete. The Python 3 version will typically be faster to +install because piwheels.org has pre-built versions of the dependencies +available, so this is our recommended option. + +### Next Steps + +After installing TensorFlow, [validate your +installation](#ValidateYourInstallation) to confirm that the installation worked +properly. + +### Uninstalling TensorFlow + +To uninstall TensorFlow, issue one of following commands: + +
$ pip uninstall tensorflow
+$ pip3 uninstall tensorflow 
+ +## Cross-compiling from sources + +Cross-compilation means building on a different machine than than you'll be +deploying on. Since Raspberry Pi's only have limited RAM and comparatively slow +processors, and TensorFlow has a large amount of source code to compile, it's +easier to use a MacOS or Linux desktop or laptop to handle the build process. +Because it can take over 24 hours to build on a Pi, and requires external swap +space to cope with the memory shortage, we recommend using cross-compilation if +you do need to compile TensorFlow from source. To make the dependency management +process easier, we also recommend using Docker to help simplify building. + +Note that we provide well-tested, pre-built TensorFlow binaries for Raspbian +systems. So, don't build a TensorFlow binary yourself unless you are very +comfortable building complex packages from source and dealing with the +inevitable aftermath should things not go exactly as documented + +### Prerequisite: Docker + +Install Docker on your machine as described in the [Docker +documentation](https://docs.docker.com/engine/installation/#/on-macos-and-windows). + +### Clone the TensorFlow repository + +Start the process of building TensorFlow by cloning a TensorFlow repository. + +To clone **the latest** TensorFlow repository, issue the following command: + +
$ git clone https://github.com/tensorflow/tensorflow 
+ +The preceding git clone command creates a subdirectory named +`tensorflow`. After cloning, you may optionally build a **specific branch** +(such as a release branch) by invoking the following commands: + +
+$ cd tensorflow
+$ git checkout Branch # where Branch is the desired branch
+
+ +For example, to work with the `r1.0` release instead of the master release, +issue the following command: + +
$ git checkout r1.0
+ +### Build from source + +To compile TensorFlow and produce a binary pip can install, do the following: + +1. Start a terminal. +2. Navigate to the directory containing the tensorflow source code. +3. Run a command to cross-compile the library, for example: + +
$ CI_DOCKER_EXTRA_PARAMS="-e CI_BUILD_PYTHON=python3 -e CROSSTOOL_PYTHON_INCLUDE_PATH=/usr/include/python3.4" \
+tensorflow/tools/ci_build/ci_build.sh PI-PYTHON3 tensorflow/tools/ci_build/pi/build_raspberry_pi.sh
+ 
+ +This will build a pip .whl file for Python 3.4, with Arm v7 instructions that +will only work on the Pi models 2 or 3. These NEON instructions are required for +the fastest operation on those devices, but you can build a library that will +run across all Pi devices by passing `PI_ONE` at the end of the command line. +You can also target Python 2.7 by omitting the initial docker parameters. Here's +an example of building for Python 2.7 and Raspberry Pi model Zero or One +devices: + +
$ tensorflow/tools/ci_build/ci_build.sh PI tensorflow/tools/ci_build/pi/build_raspberry_pi.sh PI_ONE
+ +This will take some time to complete, typically twenty or thirty minutes, and +should produce a .whl file in an output-artifacts sub-folder inside your source +tree at the end. This wheel file can be installed through pip or pip3 (depending +on your Python version) by copying it to a Raspberry Pi and running a terminal +command like this (with the name of your actual file substituted): + +
$ pip3 install tensorflow-1.9.0-cp34-none-linux_armv7l.whl
+ +### Troubleshooting the build + +The build script uses Docker internally to create a Linux virtual machine to +handle the compilation. If you do have problems running the script, first check +that you're able to run Docker tests like `docker run hello-world` on your +system. + +If you're building from the latest development branch, try syncing to an older +version that's known to work, for example release 1.9, with a command like this: + +
$ git checkout r1.0
+ + + +## Validate your installation + +To validate your TensorFlow installation, do the following: + +1. Ensure that your environment is prepared to run TensorFlow programs. +2. Run a short TensorFlow program. + +### Prepare your environment + +If you installed on native pip, Virtualenv, or Anaconda, then do the following: + +1. Start a terminal. +2. If you installed TensorFlow source code, navigate to any directory *except* + one containing TensorFlow source code. + +### Run a short TensorFlow program + +Invoke python from your shell as follows: + +
$ python
+ +Enter the following short program inside the python interactive shell: + +```python +# Python +import tensorflow as tf +hello = tf.constant('Hello, TensorFlow!') +sess = tf.Session() +print(sess.run(hello)) +``` + +If the system outputs the following, then you are ready to begin writing +TensorFlow programs: + +
Hello, TensorFlow!
+ +If you're running with Python 3.5, you may see a warning when you first import +TensorFlow. This is not an error, and TensorFlow should continue to run with no +problems, despite the log message. + +If the system outputs an error message instead of a greeting, see [Common +installation problems](#common_installation_problems). + +If you are new to machine learning, we recommend the [Machine Learning Crash +Course](https://developers.google.com/machine-learning/crash-course). + +If you are experienced with machine learning but new to TensorFlow, see +@{$get_started/eager}. + +## Common installation problems + +We are relying on Stack Overflow to document TensorFlow installation problems +and their remedies. The following table contains links to Stack Overflow answers +for some common installation problems. If you encounter an error message or +other installation problem not listed in the following table, search for it on +Stack Overflow. If Stack Overflow doesn't show the error message, ask a new +question about it on Stack Overflow and specify the `tensorflow` tag. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Stack Overflow Link Error Message
42006320
ImportError: Traceback (most recent call last):
+File ".../tensorflow/core/framework/graph_pb2.py", line 6, in 
+from google.protobuf import descriptor as _descriptor
+ImportError: cannot import name 'descriptor'
+
33623453
IOError: [Errno 2] No such file or directory:
+  '/tmp/pip-o6Tpui-build/setup.py'
+
35190574
SSLError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify
+  failed
42009190
+  Installing collected packages: setuptools, protobuf, wheel, numpy, tensorflow
+  Found existing installation: setuptools 1.1.6
+  Uninstalling setuptools-1.1.6:
+  Exception:
+  ...
+  [Errno 1] Operation not permitted:
+  '/tmp/pip-a1DXRT-uninstall/.../lib/python/_markerlib' 
33622019
ImportError: No module named copyreg
37810228During a pip install operation, the system returns: +
OSError: [Errno 1] Operation not permitted
+
33622842An import tensorflow statement triggers an error such as the + following:
Traceback (most recent call last):
+  File "", line 1, in 
+  File "/usr/local/lib/python2.7/site-packages/tensorflow/__init__.py",
+    line 4, in 
+    from tensorflow.python import *
+    ...
+  File "/usr/local/lib/python2.7/site-packages/tensorflow/core/framework/tensor_shape_pb2.py",
+    line 22, in 
+    serialized_pb=_b('\n,tensorflow/core/framework/tensor_shape.proto\x12\ntensorflow\"d\n\x10TensorShapeProto\x12-\n\x03\x64im\x18\x02
+      \x03(\x0b\x32
+      .tensorflow.TensorShapeProto.Dim\x1a!\n\x03\x44im\x12\x0c\n\x04size\x18\x01
+      \x01(\x03\x12\x0c\n\x04name\x18\x02 \x01(\tb\x06proto3')
+  TypeError: __init__() got an unexpected keyword argument 'syntax'
+
-- GitLab From 76b8b01740233ff289d70a0d516c6e0ac0e6b042 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 11 Jun 2018 11:55:34 -0700 Subject: [PATCH 0061/2038] Use the Keras session for saving/loading in TensorFlow format Fixes issues when there's no default session PiperOrigin-RevId: 200088574 --- tensorflow/python/keras/engine/network.py | 10 +++- tensorflow/python/keras/engine/saving_test.py | 52 +++++++++++++------ 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/keras/engine/network.py b/tensorflow/python/keras/engine/network.py index 9dbf94a276..3d567b8378 100644 --- a/tensorflow/python/keras/engine/network.py +++ b/tensorflow/python/keras/engine/network.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import copy +import functools import json import os import weakref @@ -1264,7 +1265,11 @@ class Network(base_layer.Layer): with h5py.File(filepath, 'w') as f: saving.save_weights_to_hdf5_group(f, self.layers) else: - self._checkpointable_saver.save(filepath) + if context.executing_eagerly(): + session = None + else: + session = backend.get_session() + self._checkpointable_saver.save(filepath, session=session) def load_weights(self, filepath, by_name=False): """Loads all layer weights, either from a TensorFlow or an HDF5 weight file. @@ -1324,7 +1329,8 @@ class Network(base_layer.Layer): 'loading TensorFlow-formatted weights (got by_name=True to ' 'load_weights).') if not context.executing_eagerly(): - finalizer = status.run_restore_ops + session = backend.get_session() + finalizer = functools.partial(status.run_restore_ops, session=session) if self.built: finalizer() else: diff --git a/tensorflow/python/keras/engine/saving_test.py b/tensorflow/python/keras/engine/saving_test.py index 30bcd3d185..b5448a9be1 100644 --- a/tensorflow/python/keras/engine/saving_test.py +++ b/tensorflow/python/keras/engine/saving_test.py @@ -404,26 +404,27 @@ class TestWholeModelSaving(test.TestCase): os.remove(fname) def test_saving_lambda_numpy_array_arguments(self): - if h5py is None: - self.skipTest('h5py required to run this test') + with self.test_session(): + if h5py is None: + self.skipTest('h5py required to run this test') - mean = np.random.random((4, 2, 3)) - std = np.abs(np.random.random((4, 2, 3))) + 1e-5 - inputs = keras.layers.Input(shape=(4, 2, 3)) - output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std, - arguments={'mu': mean, 'std': std})(inputs) - model = keras.models.Model(inputs, output) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) + mean = np.random.random((4, 2, 3)) + std = np.abs(np.random.random((4, 2, 3))) + 1e-5 + inputs = keras.layers.Input(shape=(4, 2, 3)) + output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std, + arguments={'mu': mean, 'std': std})(inputs) + model = keras.models.Model(inputs, output) + model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) - model = keras.models.load_model(fname) - os.close(fd) - os.remove(fname) + model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) - self.assertAllClose(mean, model.layers[1].arguments['mu']) - self.assertAllClose(std, model.layers[1].arguments['std']) + self.assertAllClose(mean, model.layers[1].arguments['mu']) + self.assertAllClose(std, model.layers[1].arguments['std']) def test_saving_model_with_long_layer_names(self): if h5py is None: @@ -580,6 +581,25 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase): # Indirectly tests that the user is prompted model.save_weights(prefix, save_format='tensorflow', overwrite=False) + def test_no_default_session(self): + with ops.Graph().as_default(): + self.assertFalse(ops.get_default_session()) + data = np.random.random((1000, 32)).astype(np.float32) + labels = np.random.random((1000, 10)).astype(np.float32) + + model = keras.models.Sequential([ + keras.layers.Dense(10, activation='softmax'), + keras.layers.Dense(10, activation='softmax')]) + + model.compile(optimizer=training_module.RMSPropOptimizer(0.001), + loss='categorical_crossentropy', + metrics=['accuracy']) + + model.fit(data, labels) + fname = os.path.join(self.get_temp_dir(), 'weights', 'ckpt') + model.save_weights(fname) + model.load_weights(fname) + def test_no_graph_pollution(self): with context.graph_mode(): graph = ops.Graph() -- GitLab From 50ba6dd3a182c9578bc10cb2a21d7914a1e7bac1 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 11 Jun 2018 10:42:15 -0700 Subject: [PATCH 0062/2038] Don't call back into python during insert (which will leave the set in a broken condition if the runtime decides to let another thread run). Thank you for finding the bug. The watched_variables_ set should not really require a lock since all our functions hold the GIL (verified by looking at the generated SWIG). The reason that there was a concurrent access to the set is that the insert was calling back into python (which might release the GIL and let another thread run, which will also attempt to insert a variable and break the set). I included the lock to be safe though, since its non-trivial to verify without looking at the generated swig wrappers that the GIL is held. PiperOrigin-RevId: 200074843 --- tensorflow/python/eager/pywrap_tfe_src.cc | 82 ++++++++++++----------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index e3ce0ef9d0..52b3268903 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -873,22 +873,6 @@ static tensorflow::DataType FastTensorDtype(PyObject* tensor) { return static_cast(id); } -static tensorflow::int64 FastHandleId(PyObject* variable) { - PyObject* handle = PyObject_GetAttrString(variable, "handle"); - if (handle == nullptr) { - return -1; - } - tensorflow::int64 id = FastTensorId(handle); - Py_DECREF(handle); - return id; -} - -struct CompareByHandleId { - bool operator()(PyObject* lhs, PyObject* rhs) { - return FastHandleId(lhs) < FastHandleId(rhs); - } -}; - class GradientTape : public tensorflow::eager::GradientTape { public: @@ -897,35 +881,63 @@ class GradientTape persistent) {} virtual ~GradientTape() { - for (PyObject* v : watched_variables_) { - Py_DECREF(v); + for (const IdAndVariable& v : watched_variables_) { + Py_DECREF(v.variable); } } void WatchVariable(PyObject* v) { - auto insert_result = watched_variables_.insert(v); - if (insert_result.second) { - // Only increment the reference count if we aren't already watching this - // variable. - Py_INCREF(v); - } - PyObject* handle = PyObject_GetAttrString(v, "handle"); + tensorflow::Safe_PyObjectPtr handle(PyObject_GetAttrString(v, "handle")); if (handle == nullptr) { return; } - tensorflow::int64 id = FastTensorId(handle); - Py_DECREF(handle); + tensorflow::int64 id = FastTensorId(handle.get()); + if (!PyErr_Occurred()) { this->Watch(id); } + + tensorflow::mutex_lock l(watched_variables_mu_); + auto insert_result = watched_variables_.emplace(id, v); + + if (insert_result.second) { + // Only increment the reference count if we aren't already watching this + // variable. + Py_INCREF(v); + } } - const std::set WatchedVariables() { - return watched_variables_; + PyObject* GetVariablesAsPyTuple() { + tensorflow::mutex_lock l(watched_variables_mu_); + PyObject* result = PyTuple_New(watched_variables_.size()); + Py_ssize_t pos = 0; + for (const IdAndVariable& id_and_variable : watched_variables_) { + PyTuple_SET_ITEM(result, pos++, id_and_variable.variable); + Py_INCREF(id_and_variable.variable); + } + return result; } private: - std::set watched_variables_; + // We store an IdAndVariable in the map since the map needs to be locked + // during insert, but should not call back into python during insert to avoid + // deadlocking with the GIL. + struct IdAndVariable { + tensorflow::int64 id; + PyObject* variable; + + IdAndVariable(tensorflow::int64 id, PyObject* variable) + : id(id), variable(variable) {} + }; + struct CompareById { + bool operator()(const IdAndVariable& lhs, const IdAndVariable& rhs) { + return lhs.id < rhs.id; + } + }; + + tensorflow::mutex watched_variables_mu_; + std::set watched_variables_ + GUARDED_BY(watched_variables_mu_); }; typedef struct { @@ -1217,15 +1229,7 @@ void TFE_Py_TapeSetWatchVariable(PyObject* variable) { } PyObject* TFE_Py_TapeWatchedVariables(PyObject* tape) { - const auto& watched_variables = - reinterpret_cast(tape)->tape->WatchedVariables(); - PyObject* result = PyTuple_New(watched_variables.size()); - Py_ssize_t pos = 0; - for (PyObject* variable : watched_variables) { - PyTuple_SET_ITEM(result, pos++, variable); - Py_INCREF(variable); - } - return result; + return reinterpret_cast(tape)->tape->GetVariablesAsPyTuple(); } namespace { -- GitLab From ec769c7ec368adf90aaa0b6d2a97525da14e1a37 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Mon, 11 Jun 2018 16:27:12 -0700 Subject: [PATCH 0063/2038] Remove memory leak in read variable call, and record gradient call. Fix #19385 PiperOrigin-RevId: 200132949 --- tensorflow/python/eager/pywrap_tfe_src.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 52b3268903..6c9481c3af 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1873,6 +1873,8 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, delete backward_function; }); + Py_DECREF(num_inputs); + Py_RETURN_NONE; } @@ -1931,8 +1933,10 @@ bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info, Py_INCREF(output->get()); // stay alive after since tuple steals. PyTuple_SET_ITEM(outputs.get(), 0, output->get()); - if (!RecordGradient(GetPythonObjectFromString("ReadVariableOp"), - inputs.get(), Py_None, outputs.get(), Py_None)) { + tensorflow::Safe_PyObjectPtr op_string( + GetPythonObjectFromString("ReadVariableOp")); + if (!RecordGradient(op_string.get(), inputs.get(), Py_None, outputs.get(), + Py_None)) { return false; } } -- GitLab From c77fead531bc3756d765ba90e2e549abd7adf320 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 13 Jun 2018 15:46:12 -0700 Subject: [PATCH 0064/2038] Make GCS ops work in open source --- tensorflow/contrib/cloud/__init__.py | 5 +++-- tensorflow/contrib/cloud/kernels/BUILD | 1 + tensorflow/core/platform/cloud/gcs_file_system.cc | 4 +++- tensorflow/core/platform/default/build_config.bzl | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/cloud/__init__.py b/tensorflow/contrib/cloud/__init__.py index a6e13ea3ae..ef7aa7624c 100644 --- a/tensorflow/contrib/cloud/__init__.py +++ b/tensorflow/contrib/cloud/__init__.py @@ -27,8 +27,9 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ 'BigQueryReader', - 'ConfigureColabSession', - 'ConfigureGcs', + 'BlockCacheParams', + 'configure_colab_session', + 'configure_gcs', 'ConfigureGcsHook', ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/cloud/kernels/BUILD b/tensorflow/contrib/cloud/kernels/BUILD index 40160706f7..1311063ec0 100644 --- a/tensorflow/contrib/cloud/kernels/BUILD +++ b/tensorflow/contrib/cloud/kernels/BUILD @@ -79,6 +79,7 @@ tf_kernel_library( srcs = ["gcs_config_ops.cc"], visibility = ["//tensorflow:internal"], deps = [ + "//tensorflow/contrib/cloud:gcs_config_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/platform/cloud:curl_http_request", diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 22ae6121e0..803b08f1a3 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -804,7 +804,9 @@ void GcsFileSystem::ResetFileBlockCache(size_t block_size_bytes, mutex_lock l(block_cache_lock_); file_block_cache_ = MakeFileBlockCache(block_size_bytes, max_bytes, max_staleness_secs); - stats_->Configure(this, &throttle_, file_block_cache_.get()); + if (stats_) { + stats_->Configure(this, &throttle_, file_block_cache_.get()); + } } // A helper function to build a FileBlockCache for GcsFileSystem. diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 9e52ba344a..f12732b434 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -633,6 +633,7 @@ def tf_additional_cloud_op_deps(): "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", + "//tensorflow/contrib/cloud:gcs_config_ops_op_lib", ], "//conditions:default": [], }) @@ -645,6 +646,7 @@ def tf_additional_cloud_kernel_deps(): "//tensorflow:with_gcp_support_ios_override": [], "//tensorflow:with_gcp_support": [ "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops", + "//tensorflow/contrib/cloud/kernels:gcs_config_ops", ], "//conditions:default": [], }) -- GitLab From f9a44a69c35dcf7f1c0f42e1ae9971bae0148099 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 13 Jun 2018 18:05:39 -0700 Subject: [PATCH 0065/2038] Update the docs and api_def. --- .../contrib/cloud/ops/gcs_config_ops.cc | 42 +------------------ .../api_def_GcsConfigureBlockCache.pbtxt | 9 ++++ .../api_def_GcsConfigureCredentials.pbtxt | 33 +++++++++++++++ 3 files changed, 44 insertions(+), 40 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_GcsConfigureBlockCache.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_GcsConfigureCredentials.pbtxt diff --git a/tensorflow/contrib/cloud/ops/gcs_config_ops.cc b/tensorflow/contrib/cloud/ops/gcs_config_ops.cc index 9cf85f5f18..5e31a15498 100644 --- a/tensorflow/contrib/cloud/ops/gcs_config_ops.cc +++ b/tensorflow/contrib/cloud/ops/gcs_config_ops.cc @@ -21,50 +21,12 @@ namespace tensorflow { REGISTER_OP("GcsConfigureCredentials") .Input("json: string") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Configures the credentials used by the GCS client of the local TF runtime. - -The json input can be of the format: - -1. Refresh Token: -{ - "client_id": "", - "client_secret": "", - "refresh_token: "", - "type": "authorized_user", -} - -2. Service Account: -{ - "type": "service_account", - "project_id": "", - "private_key_id": "", - "private_key": "------BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY------\n", - "client_email": "@.iam.gserviceaccount.com", - "client_id": "", - # Some additional fields elided -} - -Note the credentials established through this method are shared across all -sessions run on this runtime. - -Note be sure to feed the inputs to this op to ensure the credentials are not -stored in a constant op within the graph that might accidentally be checkpointed -or in other ways be persisted or exfiltrated. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); REGISTER_OP("GcsConfigureBlockCache") .Input("max_cache_size: uint64") .Input("block_size: uint64") .Input("max_staleness: uint64") - .SetShapeFn(shape_inference::NoOutputs) - .Doc(R"doc( -Re-configures the GCS block cache with the new configuration values. - -If the values are the same as already configured values, this op is a no-op. If -they are different, the current contents of the block cache is dropped, and a -new block cache is created fresh. -)doc"); + .SetShapeFn(shape_inference::NoOutputs); } // namespace tensorflow diff --git a/tensorflow/core/api_def/base_api/api_def_GcsConfigureBlockCache.pbtxt b/tensorflow/core/api_def/base_api/api_def_GcsConfigureBlockCache.pbtxt new file mode 100644 index 0000000000..9d32940c64 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_GcsConfigureBlockCache.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "GcsConfigureBlockCache" + summary: "Re-configures the GCS block cache with the new configuration values." + description: <", + "client_secret": "", + "refresh_token: "", + "type": "authorized_user", +} + +2. Service Account: +{ + "type": "service_account", + "project_id": "", + "private_key_id": "", + "private_key": "------BEGIN PRIVATE KEY-----\n\n-----END PRIVATE KEY------\n", + "client_email": "@.iam.gserviceaccount.com", + "client_id": "", + # Some additional fields elided +} + +Note the credentials established through this method are shared across all +sessions run on this runtime. + +Note be sure to feed the inputs to this op to ensure the credentials are not +stored in a constant op within the graph that might accidentally be checkpointed +or in other ways be persisted or exfiltrated. +END0 +} -- GitLab From ea3bdbc7ea72e488566326aeb446681a557f4334 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 14 Jun 2018 06:17:00 -0700 Subject: [PATCH 0066/2038] Update version strings for 1.9.0-rc1. --- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 18 +++++++-------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 4 ++-- tensorflow/tools/pip_package/setup.py | 2 +- 8 files changed, 31 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index cb1fd09dbb..9e5e747557 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 2901848745..2f81ae0c40 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.9.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 55bc0f64e7..1c03dd223e 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.9.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index b3b739212e..c73e2f4281 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.9.0-rc0 + 1.9.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.9.0-rc0 + 1.9.0-rc1 @@ -124,12 +124,12 @@ instead: org.tensorflow libtensorflow - 1.9.0-rc0 + 1.9.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.9.0-rc0 + 1.9.0-rc1 ``` @@ -148,7 +148,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -167,7 +167,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.9.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -175,10 +175,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.9.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.9.0-rc1.zip). 3. Extract this .zip file. @@ -227,7 +227,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.9.0-rc0.jar HelloTF.java
+
javac -cp libtensorflow-1.9.0-rc1.jar HelloTF.java
### Running @@ -241,11 +241,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.9.0-rc0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.9.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.9.0-rc0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.9.0-rc1.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 2ecab808c4..9baf6870be 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -438,7 +438,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc1-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -684,14 +684,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -703,14 +703,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -722,14 +722,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -741,14 +741,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.9.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.9.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 9d01271c5a..693254f876 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc1-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc1-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -350,7 +350,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc1-py2-none-any.whl @@ -522,7 +522,7 @@ The value you specify depends on your Python version.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc1-py2-none-any.whl
 
@@ -530,5 +530,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.9.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index d25e641cee..70e97cf556 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -328,10 +328,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.9.0rc0 on Linux: +for TensorFlow 1.9.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.9.0rc1-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 92a1465cea..eb2e359ee5 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.9.0-rc0' +_VERSION = '1.9.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 5c6aff23fa126f530f1bcfd67eeecf15926c5995 Mon Sep 17 00:00:00 2001 From: Surry Shome Date: Wed, 13 Jun 2018 23:11:09 -0400 Subject: [PATCH 0067/2038] Avoid creating a new std::stringstream and copying data into it every time S3RandomAccessFile::Read() is called. Fixes #14572. --- tensorflow/core/platform/s3/s3_file_system.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/core/platform/s3/s3_file_system.cc b/tensorflow/core/platform/s3/s3_file_system.cc index 6da679dc75..917ac69c9e 100644 --- a/tensorflow/core/platform/s3/s3_file_system.cc +++ b/tensorflow/core/platform/s3/s3_file_system.cc @@ -187,9 +187,7 @@ class S3RandomAccessFile : public RandomAccessFile { return Status(error::OUT_OF_RANGE, "Read less bytes than requested"); } n = getObjectOutcome.GetResult().GetContentLength(); - std::stringstream ss; - ss << getObjectOutcome.GetResult().GetBody().rdbuf(); - ss.read(scratch, n); + getObjectOutcome.GetResult().GetBody().read(scratch, n); *result = StringPiece(scratch, n); return Status::OK(); -- GitLab From b2a12d441d556921ac06d00a356bdc5a34c02f11 Mon Sep 17 00:00:00 2001 From: xxxx001 Date: Mon, 18 Jun 2018 03:04:54 -0700 Subject: [PATCH 0068/2038] invalid eager env_ or env_->rendezvous_mgr. --- .../core/distributed_runtime/eager/eager_service_impl.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc index 4bd74b81a7..94f39095e1 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc @@ -80,6 +80,9 @@ Status GetNumRetvals(tensorflow::EagerContext* context, const string& op_name, Status EagerServiceImpl::CreateContext(const CreateContextRequest* request, CreateContextResponse* response) { + if (env_ == nullptr || env_->rendezvous_mgr == nullptr) { + return errors::InvalidArgument("invalid eager env_ or env_->rendezvous_mgr."); + } tensorflow::RemoteRendezvous* r = env_->rendezvous_mgr->Find(0); std::vector devices; TF_RETURN_IF_ERROR(tensorflow::DeviceFactory::AddDevices( -- GitLab From f5ee4df50af4041dc0063d0adc31c7a6eebdbcd3 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Fri, 8 Jun 2018 15:47:19 -0700 Subject: [PATCH 0069/2038] Copy edits to Keras guide, formatting, moving some things around. Make the right TOC nav more useful. PiperOrigin-RevId: 199863216 --- .../docs_src/programmers_guide/keras.md | 870 ++++++++---------- 1 file changed, 389 insertions(+), 481 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/keras.md b/tensorflow/docs_src/programmers_guide/keras.md index 6a9df12a25..c6aca7ebf4 100644 --- a/tensorflow/docs_src/programmers_guide/keras.md +++ b/tensorflow/docs_src/programmers_guide/keras.md @@ -1,334 +1,304 @@ # Keras -## What's Keras? - -Keras is a high-level API specification for building and training deep learning -models, suitable for fast prototyping, advanced research, and production. -It offers three key advantages: - -- **User friendliness.** Keras follows best practices for reducing - cognitive load: it offers consistent & simple interfaces, - it minimizes the number of user actions required for common use cases, - and it provides clear and actionable feedback upon user error. -- **Modularity and composability.** A Keras model is composed of - fully-configurable building blocks that can be plugged together - with as few restrictions as possible -- like Lego bricks. -- **Easy extensibility.** You can easily write your own building blocks - (such as new layers, new loss functions, new models where you write - the forward pass from scratch). This allows for total expressiveness, - making Keras suitable for advanced research. - - -## What's tf.keras? - -`tf.keras` is TensorFlow's implementation of the Keras API specification, that -serves as the TensorFlow high-level API: it's how you build models in TensorFlow. -`tf.keras` seamlessly integrates with the rest of the TensorFlow API -(such as `tf.data` input pipelines), bringing you the full power and flexibility -of TensorFlow through an easy-to-use interface. - -You can import `tf.keras` via: +Keras is a high-level API to build and train deep learning models. It's used for +fast prototyping, advanced research, and production, with three key advantages: + +- *User friendly*
+ Keras has a simple, consistent interface optimized for common use cases. It + provides clear and actionable feedback for user errors. +- *Modular and composable*
+ Keras models are made by connecting configurable building blocks together, + with few restrictions. +- *Easy to extend*
Write custom building blocks to express new ideas for + research. Create new layers, loss functions, and develop state-of-the-art + models. + +## Import tf.keras + +`tf.keras` is TensorFlow's implementation of the +[Keras API specification](https://keras.io){:.external}. This is a high-level +API to build and train models that includes first-class support for +TensorFlow-specific functionality, such as [eager execution](#eager_execution), +`tf.data` pipelines, and [Estimators](/programmers_guide/estimators). +`tf.keras` makes TensorFlow easier to use without sacrificing flexibility and +performance. + +To get started, import `tf.keras` as part of your TensorFlow program setup: ```python +import tensorflow as tf from tensorflow import keras ``` -What follows is a quick introduction to the basics of `tf.keras`. +`tf.keras` can run any Keras-compatible code, but keep in mind: +* The `tf.keras` version in the latest TensorFlow release might not be the same + as the latest `keras` version from PyPI. Check `tf.keras.__version__`. +* When [saving a model's weights](#weights_only), `tf.keras` defaults to the + [checkpoint format](/get_started/checkpoints). Pass `save_format='h5'` to use + HDF5. -## Table of contents +## Build a simple model -- [Getting started: the Sequential model](#getting-started-the-sequential-model) -- [Configuring layers](#configuring-layers) -- [Configuring training](#configuring-training) -- [Training and evaluation](#training-and-evaluation) -- [Building advanced models: the functional API](#building-advanced-models-the-functional-api) -- [Building fully-customizable research models: the Model subclassing API](#building-fully-customizable-research-models-the-model-subclassing-api) -- [Callbacks](#callbacks) -- [Saving and serialization](#saving-and-serialization) -- [Developing custom layers](#developing-custom-layers) -- [Eager execution](#eager-execution) -- [Further reading](#further-reading) -- [FAQ](#faq) +### Sequential model +In Keras, you assemble *layers* to build *models*. A model is (usually) a graph +of layers. The most common type of model is a stack of layers: the +`tf.keras.Sequential` model. ---- - -## Getting started: the Sequential model - -In `tf.keras`, you're assembling together **layers** to build **models**. -A model is generally a graph of layers. -The most common type of model is just a stack of layers: the `Sequential` class. - -Here's how to build a simple fully-connected network (multi-layer perceptron): +To build a simple, fully-connected network (i.e. multi-layer perceptron): ```python -from tensorflow import keras -from tensorflow.keras import layers - model = keras.Sequential() -# This adds to the model a densely-connected layer with 64 units: -model.add(Dense(64, activation='relu')) -# Another one: -model.add(Dense(64, activation='relu')) -# This adds a softmax layer with 10 output units: -model.add(Dense(10, activation='softmax')) +# Adds a densely-connected layer with 64 units to the model: +model.add(keras.layers.Dense(64, activation='relu')) +# Add another: +model.add(keras.layers.Dense(64, activation='relu')) +# Add a softmax layer with 10 output units: +model.add(keras.layers.Dense(10, activation='softmax')) ``` ---- - -## Configuring layers - -Each layer may have unique constructor arguments, but some common arguments include: +### Configure the layers -- `activation`: the activation function to be used. - It could be specified by name, as a string (for built-in functions) - or as a callable object. By default, no activation is applied. -- `kernel_initializer` and `bias_initializer`: the initialization schemes to use - to create the layer's weights (kernel and bias). - Likewise, they may be passed either by name or by specifying a callable. - By default, the "Glorot uniform" initializer is used. -- `kernel_regularizer` and `bias_regularizer`: the regularization schemes to - apply to the layer's weights (kernel and bias), such as L1 - or L2 regularization. By default, no regularization is applied. +There are many `tf.keras.layers` available with some common constructor +parameters: +* `activation`: Set the activation function for the layer. This parameter is + specified by the name of a built-in function or as a callable object. By + default, no activation is applied. +* `kernel_initializer` and `bias_initializer`: The initialization schemes + that create the layer's weights (kernel and bias). This parameter is a name or + a callable object. This defaults to the `"Glorot uniform"` initializer. +* `kernel_regularizer` and `bias_regularizer`: The regularization schemes + that apply the layer's weights (kernel and bias), such as L1 or L2 + regularization. By default, no regularization is applied. -### Examples +The following instantiates `tf.keras.layers.Dense` layers using constructor +arguments: ```python -import tensorflow as tf -from tensorflow.keras.layers import Dense -from tensorflow.keras import regularizers -from tensorflow.keras import initializers - -# A sigmoid layer: -Dense(64, activation='sigmoid') -# Another way to define the same sigmoid layer: -Dense(64, activation=tf.sigmoid) - -# A linear layer with L1 regularization of factor 0.01 -# applied to the kernel matrix: -Dense(64, kernel_regularizer=regularizers.l1(0.01)) -# A linear layer with L2 regularization of factor 0.01 -# applied to the bias vector: -Dense(64, bias_regularizer=regularizers.l2(0.01)) +# Create a sigmoid layer: +layers.Dense(64, activation='sigmoid') +# Or: +layers.Dense(64, activation=tf.sigmoid) + +# A linear layer with L1 regularization of factor 0.01 applied to the kernel matrix: +layers.Dense(64, kernel_regularizer=keras.regularizers.l1(0.01)) +# A linear layer with L2 regularization of factor 0.01 applied to the bias vector: +layers.Dense(64, bias_regularizer=keras.regularizers.l2(0.01)) # A linear layer with a kernel initialized to a random orthogonal matrix: -Dense(64, kernel_initializer='orthogonal') +layers.Dense(64, kernel_initializer='orthogonal') # A linear layer with a bias vector initialized to 2.0s: -Dense(64, bias_initializer=initializers.constant(2.0)) +layers.Dense(64, bias_initializer=keras.initializers.constant(2.0)) ``` ---- +## Train and evaluate -## Configuring training +### Set up training -Once your model looks good, configure its learning process by calling `compile`: +After the model is constructed, configure its learning process by calling the +`compile` method: ```python -import tensorflow as tf - model.compile(optimizer=tf.train.AdamOptimizer(0.001), loss='categorical_crossentropy', metrics=['accuracy']) ``` -There are three key arguments that you need to specify: +`tf.keras.Model.compile` takes three important arguments: -- An `optimizer`: this object specifies the training procedure. - We recommend that you pass instances of optimizers from the `tf.train` module - (such as [`AdamOptimizer`](https://www.tensorflow.org/api_docs/python/tf/train/AdamOptimizer), - [`RMSPropOptimizer`](https://www.tensorflow.org/api_docs/python/tf/train/RMSPropOptimizer), - or [`GradientDescentOptimizer`](https://www.tensorflow.org/api_docs/python/tf/train/GradientDescentOptimizer)). -- A `loss` function to minimize: this specifies the optimization objective. - Common choices include mean square error (`mse`), `categorical_crossentropy` - and `binary_crossentropy`. Loss functions may be specified by name - or by passing a callable (e.g. from the `tf.keras.losses` module). -- Some `metrics` to monitor during training: again, you can pass these as either - string names or callables (e.g. from the `tf.keras.metrics` module). +* `optimizer`: This object specifies the training procedure. Pass it optimizer + instances from the `tf.train` module, such as + [`AdamOptimizer`](/api_docs/python/tf/train/AdamOptimizer), + [`RMSPropOptimizer`](/api_docs/python/tf/train/RMSPropOptimizer), or + [`GradientDescentOptimizer`](/api_docs/python/tf/train/GradientDescentOptimizer). +* `loss`: The function to minimize during optimization. Common choices include + mean square error (`mse`), `categorical_crossentropy`, and + `binary_crossentropy`. Loss functions are specified by name or by + passing a callable object from the `tf.keras.losses` module. +* `metrics`: Used to monitor training. These are string names or callables from + the `tf.keras.metrics` module. - -### Examples +The following shows a few examples of configuring a model for training: ```python -# Configures a model to do mean-squared error regression. +# Configure a model for mean-squared error regression. model.compile(optimizer=tf.train.AdamOptimizer(0.01), - loss='mse', # mean squared error + loss='mse', # mean squared error metrics=['mae']) # mean absolute error -``` -```python -# Configures a model to do categorical classification. + +# Configure a model for categorical classification. model.compile(optimizer=tf.train.RMSPropOptimizer(0.01), - loss=tf.keras.losses.categorical_crossentropy, - metrics=[tf.keras.metrics.categorical_accuracy]) + loss=keras.losses.categorical_crossentropy, + metrics=[keras.metrics.categorical_accuracy]) ``` ---- - -## Training and evaluation +### Input NumPy data -### From Numpy data - -When running locally on small datasets, the easiest way to do training and -evaluation is to pass data to your model as Numpy arrays of inputs and targets. -You can "fit" your model to some training data using the `model.fit()` method: +For small datasets, use in-memory [NumPy](https://www.numpy.org/){:.external} +arrays to train and evaluate a model. The model is "fit" to the training data +using the `fit` method: ```python import numpy as np -data = np.random.random(shape=(1000, 32)) -targets = np.random.random(shape=(1000, 10)) +data = np.random.random((1000, 32)) +labels = np.random.random((1000, 10)) -model.fit(data, targets, epochs=10, batch_size=32) +model.fit(data, labels, epochs=10, batch_size=32) ``` -Here are some key arguments you can pass to the `fit` method: - -- `epochs`: Training is structured into **epochs**. An epoch is one iteration - over the entire input data (which is done in smaller batches). -- `batch_size`: when passing Numpy data, the model will slice the data into - smaller batches and iterate over these batches during training. - This integer specifies the size of each batch - (the last batch may be smaller if the total number of samples is not - divisible by the batch size). -- `validation_data`: when prototyping a model, you want to be able to quickly - monitor its performance on some validation data. - When you pass this argument (it expects a tuple of inputs and targets), - the model will display the loss and metrics in inference mode on the data - you passed, at the end of each epoch. +`tf.keras.Model.fit` takes three important arguments: + +* `epochs`: Training is structured into *epochs*. An epoch is one iteration over + the entire input data (this is done in smaller batches). +* `batch_size`: When passed NumPy data, the model slices the data into smaller + batches and iterates over these batches during training. This integer + specifies the size of each batch. Be aware that the last batch may be smaller + if the total number of samples is not divisible by the batch size. +* `validation_data`: When prototyping a model, you want to easily monitor its + performance on some validation data. Passing this argument—a tuple of inputs + and labels—allows the model to display the loss and metrics in inference mode + for the passed data, at the end of each epoch. Here's an example using `validation_data`: ```python import numpy as np -data = np.random.random(shape=(1000, 32)) -targets = np.random.random(shape=(1000, 10)) +data = np.random.random((1000, 32)) +labels = np.random.random((1000, 10)) -val_data = np.random.random(shape=(100, 32)) -val_targets = np.random.random(shape=(100, 10)) +val_data = np.random.random((100, 32)) +val_labels = np.random.random((100, 10)) -model.fit(data, targets, epochs=10, batch_size=32, - validation_data=(val_data, val_targets)) +model.fit(data, labels, epochs=10, batch_size=32, + validation_data=(val_data, val_labels)) ``` -### From tf.data datasets +### Input tf.data datasets -When you need to scale to large datasets or multi-device training, -training from Numpy arrays in memory will not be ideal. -In such cases, you should use [the `tf.data` API](https://www.tensorflow.org/programmers_guide/datasets). -You can pass a `tf.data.Dataset` instance to the `fit` method: +Use the [Datasets API](/programmers_guide/datasets) to scale to large datasets +or multi-device training. Pass a `tf.data.Dataset` instance to the `fit` +method: ```python -import tensorflow as tf - # Instantiates a toy dataset instance: -dataset = tf.data.Dataset.from_tensor_slices((data, targets)).batch(32) +dataset = tf.data.Dataset.from_tensor_slices((data, labels)) +dataset = dataset.batch(32) +dataset = dataset.repeat() # Don't forget to specify `steps_per_epoch` when calling `fit` on a dataset. model.fit(dataset, epochs=10, steps_per_epoch=30) ``` -When doing so, the dataset itself will yield batches of data, -so the model does not need to be passed `batch_size` information. -Instead, the model needs to know for how many steps (or batches of data) -it should run at each epoch. -You specify this with the `steps_per_epoch` argument: it's the number of -training steps the model will run before moving on the next epoch. +Here, the `fit` method uses the `steps_per_epoch` argument—this is the number of +training steps the model runs before it moves to the next epoch. Since the +`Dataset` yields batches of data, this snippet does not require a `batch_size`. -You can also pass datasets for validation: +Datasets can also be used for validation: ```python -dataset = tf.data.Dataset.from_tensor_slices((data, targets)).batch(32) -val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_targets)).batch(32) +dataset = tf.data.Dataset.from_tensor_slices((data, labels)) +dataset = dataset.batch(32).repeat() -model.fit(dataset, epochs=10, steps_per_epoch=30, validation_data=val_dataset, validation_steps=3) +val_dataset = tf.data.Dataset.from_tensor_slices((val_data, val_labels)) +val_dataset = val_dataset.batch(32).repeat() + +model.fit(dataset, epochs=10, steps_per_epoch=30, + validation_data=val_dataset, + validation_steps=3) ``` ### Evaluate and predict -In addition, you get access to the following methods -(both with Numpy data and dataset instances): +The `tf.keras.Model.evaluate` and `tf.keras.Model.predict` methods can use NumPy +data and a `tf.data.Dataset`. -- `model.evaluate(x, y, batch_size=32)` or `model.evaluate(dataset, steps=30)` - will return the inference-mode loss and metrics for the data provided. -- `model.predict(x, y, batch_size=32)` or `model.predict(dataset, steps=30)` - will return the output(s) of the last layer(s) in inference on the data - provided, as Numpy array(s). +To *evaluate* the inference-mode loss and metrics for the data provided: ---- +```python +model.evaluate(x, y, batch_size=32) -## Building advanced models: the functional API +model.evaluate(dataset, steps=30 +``` -The `Sequential` model cannot represent arbitrary models -- only simple stacks -of layers. If you need to use more complex model topologies, -such as multi-input models, multi-output models, -models with a same layer called several times (shared layers), -or models with non-sequential data flows (e.g. residual connections), -you can use the 'functional API'. +And to *predict* the output of the last layer in inference for the data provided, +as a NumPy array: -Here's how it works: +``` +model.predict(x, batch_size=32) -- A layer instance is callable (on a tensor), and it returns a tensor. -- Input tensor(s) and output tensor(s) can then be used to define a `Model` instance. -- Such a model can be trained just like the `Sequential` model. +model.predict(dataset, steps=30) +``` -Here's a basic example showing the same model we previously defined, -built using the functional API: +## Build advanced models -```python -from tensorflow import keras -from tensorflow.keras import layers +### Functional API -# This returns a placeholder tensor: -inputs = keras.Input(shape=(784,)) +The `tf.keras.Sequential` model is a simple stack of layers that cannot +represent arbitrary models. Use the +[Keras functional API](https://keras.io/getting-started/functional-api-guide/){:.external} +to build complex model topologies such as: + +* Multi-input models, +* Multi-output models, +* Models with shared layers (the same layer called several times), +* Models with non-sequential data flows (e.g. residual connections). + +Building a model with the functional API works like this: + +1. A layer instance is callable and returns a tensor. +2. Input tensors and output tensors are used to define a `tf.keras.Model` + instance. +3. This model is trained just like the `Sequential` model. + +The following example uses the functional API to build a simple, fully-connected +network: + +```python +inputs = keras.Input(shape=(32,)) # Returns a placeholder tensor # A layer instance is callable on a tensor, and returns a tensor. -x = layers.Dense(64, activation='relu')(inputs) -x = layers.Dense(64, activation='relu')(x) -predictions = layers.Dense(10, activation='softmax')(x) +x = keras.layers.Dense(64, activation='relu')(inputs) +x = keras.layers.Dense(64, activation='relu')(x) +predictions = keras.layers.Dense(10, activation='softmax')(x) -# Instantiates the model given inputs and outputs. +# Instantiate the model given inputs and outputs. model = keras.Model(inputs=inputs, outputs=predictions) -# The "compile" step specifies the training configuration. -model.compile(optimizer='rmsprop', +# The compile step specifies the training configuration. +model.compile(optimizer=tf.train.RMSPropOptimizer(0.001), loss='categorical_crossentropy', metrics=['accuracy']) -# Trains for 5 epochs. +# Trains for 5 epochs model.fit(data, labels, batch_size=32, epochs=5) ``` -This API enables you to create models with multiple inputs and outputs, -and to "share" layers across different inputs -(i.e. to reuse a same instance multiple times). -For examples of these use cases, -please see [this guide to the functional API in Keras](https://keras.io/getting-started/functional-api-guide/). +### Model subclassing ---- +Build a fully-customizable model by subclassing `tf.keras.Model` and defining +your own forward pass. Create layers in the `__init__` method and set them as +attributes of the class instance. Define the forward pass in the `call` method. -## Building fully-customizable research models: the Model subclassing API +Model subclassing is particularly useful when +[eager execution](/programmers_guide/eager) is enabled since the forward pass +can be written imperatively. -Besides `Sequential` and the functional API, one last, more flexible way to -define models is to directly subclass the `Model` class and define your own -forward pass manually. +Key Point: Use the right API for the job. While model subclassing offers +flexibility, it comes at a cost of greater complexity and more opportunities for +user errors. If possible, prefer the functional API. -In this API, you instante layers in `__init__` and set them as attribute of the -class instance. Then you specify the forward pass in `call`. -This API is particularly valuable when using TensorFlow with [eager execution](https://www.tensorflow.org/programmers_guide/eager), -since eager execution allows you to write your forward pass in an -imperative fashion (as if you were writing Numpy code, for instance). +The following example shows a subclassed `tf.keras.Model` using a custom forward +pass: ```python -import tensorflow as tf -from tensorflow import keras - - class MyModel(keras.Model): - def __init__(self, num_classes=2): + def __init__(self, num_classes=10): super(MyModel, self).__init__(name='my_model') self.num_classes = num_classes # Define your layers here. @@ -351,10 +321,10 @@ class MyModel(keras.Model): # Instantiates the subclassed model. -model = MyModel(num_classes=2) +model = MyModel(num_classes=10) -# The "compile" step specifies the training configuration. -model.compile(optimizer='rmsprop', +# The compile step specifies the training configuration. +model.compile(optimizer=tf.train.RMSPropOptimizer(0.001), loss='categorical_crossentropy', metrics=['accuracy']) @@ -362,353 +332,291 @@ model.compile(optimizer='rmsprop', model.fit(data, labels, batch_size=32, epochs=5) ``` -**Remember:** use the right API for the right job. -Using the `Model` subclassing API offers more flexibility, -but at the cost of greater complexity and a larger potential user error surface. -Prefer using the functional API when possible. ---- +### Custom layers -## Callbacks +Create a custom layer by subclassing `tf.keras.layers.Layer` and implementing +the following methods: -Callbacks are objects that you can pass to your model that customize and extend -its behavior during training. -There are callbacks for saving checkpoints of your model at regular intervals -(`tf.keras.callbacks.ModelCheckpoint`), -to dynamically change the learning rate (`tf.keras.callbacks.LearningRateScheduler`) -or to interrupt training when validation performance has stopped improving -(`tf.keras.callbacks.EarlyStopping`). -You can also use a callback to monitor your model's behavior using -[TensorBoard](https://www.tensorflow.org/programmers_guide/summaries_and_tensorboard) -(`tf.keras.callbacks.TensorBoard`). -You can also write your own custom callbacks. - -Different built-in callback are found in `tf.keras.callbacks`. -You use them by passing a `Callback` instance to `fit`: +* `build`: Create the weights of the layer. Add weights with the `add_weight` + method. +* `call`: Define the forward pass. +* `compute_output_shape`: Specify how to compute the output shape of the layer + given the input shape. +* Optionally, a layer can be serialized by implementing the `get_config` method + and the `from_config` class method. + +Here's an example of a custom layer that implements a `matmul` of an input with +a kernel matrix: ```python -from tensorflow import keras +class MyLayer(keras.layers.Layer): + + def __init__(self, output_dim, **kwargs): + self.output_dim = output_dim + super(MyLayer, self).__init__(**kwargs) + + def build(self, input_shape): + shape = tf.TensorShape((input_shape[1], self.output_dim)) + # Create a trainable weight variable for this layer. + self.kernel = self.add_weight(name='kernel', + shape=shape, + initializer='uniform', + trainable=True) + # Be sure to call this at the end + super(MyLayer, self).build(input_shape) -callbacks = [ - # Interrupt training if `val_loss` stops improving for over 2 epochs - keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'), - # Write TensorBoard logs to `./logs` directory - keras.callbacks.TensorBoard(log_dir='./logs') -] -model.fit(data, labels, batch_size=32, epochs=5, callbacks=callbacks) -``` + def call(self, inputs): + return tf.matmul(inputs, self.kernel) ---- + def compute_output_shape(self, input_shape): + shape = tf.TensorShape(input_shape).as_list() + shape[-1] = self.output_dim + return tf.TensorShape(shape) -## Saving and serialization + def get_config(self): + base_config = super(MyLayer, self).get_config() + base_config['output_dim'] = self.output_dim -### Weights-only saving + @classmethod + def from_config(cls, config): + return cls(**config) -You can save the weight values of a model via `model.save_weights(filepath)`: -```python -# Saves weights to a SavedModel file. -model.save_weights('my_model') +# Create a model using the custom layer +model = keras.Sequential([MyLayer(10), + keras.layers.Activation('softmax')]) -# Restores the model's state -# (this requires a model that has the same architecture). -model.load_weights('my_model') +# The compile step specifies the training configuration +model.compile(optimizer=tf.train.RMSPropOptimizer(0.001), + loss='categorical_crossentropy', + metrics=['accuracy']) + +# Trains for 5 epochs. +model.fit(data, targets, batch_size=32, epochs=5) ``` -By default, this saves the weight in the TensorFlow -[`SavedModel`](https://www.tensorflow.org/programmers_guide/saved_model) format. -You could also save them in the Keras HDF5 format -(which is the default in the multi-backend implementation of Keras): -```python -# Saves weights to a HDF5 file. -model.save_weights('my_model.h5', format='h5') +## Callbacks -# Restores the model's state. -model.load_weights('my_model.h5') -``` +A callback is an object passed to a model to customize and extend its behavior +during training. You can write your own custom callback, or use the built-in +`tf.keras.callbacks` that include: -### Configuration-only saving (serialization) +* `tf.keras.callbacks.ModelCheckpoint`: Save checkpoints of your model at + regular intervals. +* `tf.keras.callbacks.LearningRateScheduler`: Dynamically change the learning + rate. +* `tf.keras.callbacks.EarlyStopping`: Interrupt training when validation + performance has stopped improving. +* `tf.keras.callbacks.TensorBoard`: Monitor the model's behavior using + [TensorBoard](/programmers_guide/summaries_and_tensorboard). -You can also save the model's configuration -(its architecture, without any weight values), -which allows you to recreate the same model later (freshly initialized) even if -you don't have the code that defined it anymore. -Two possible serialization formats are JSON and YAML: +To use a `tf.keras.callbacks.Callback`, pass it to the model's `fit` method: ```python -from tensorflow.keras import models - -# Serializes a model to JSON. -json_string = model.to_json() -# Recreates the model (freshly initialized). -fresh_model = models.from_json(json_string) - -# Serializes a model to YAML. -yaml_string = model.to_yaml() -# Recreates the model. -fresh_model = models.from_yaml(yaml_string) +callbacks = [ + # Interrupt training if `val_loss` stops improving for over 2 epochs + keras.callbacks.EarlyStopping(patience=2, monitor='val_loss'), + # Write TensorBoard logs to `./logs` directory + keras.callbacks.TensorBoard(log_dir='./logs') +] +model.fit(data, labels, batch_size=32, epochs=5, callbacks=callbacks, + validation_data=(val_data, val_targets)) ``` -Note that this feature is not available with subclassed models, -because they are simply not serializable: -their architecture is defined as Python code -(the body of the `call` method of the model). -### Whole-model saving +## Save and restore -Finally, you can also save a model wholesale, to a file that will contain both -the weight values, the model's configuration, -and even the optimizer's configuration. -The allows you to checkpoint a model and resume training later -- -from the exact same state -- even if you don't have access to the original code. +### Weights only -```python -from tensorflow.keras import models +Save and load the weights of a model using `tf.keras.Model.save_weights`: -model.save('my_model.h5') +```python +# Save weights to a TensorFlow Checkpoint file +model.save_weights('./my_model') -# Recreates the exact same model, complete with weights and optimizer. -model = models.load_model('my_model.h5') +# Restore the model's state, +# this requires a model with the same architecture. +model.load_weights('my_model') ``` ---- - -## Developing custom layers - -You can write your own custom layers by subclassing the class -`tf.keras.layers.Layer`. You will need to implement the following three methods: - -- `build`: Creates the weights of the layer. - Weights should be added via the `add_weight` method. -- `call`: Specifies the forward pass. -- `compute_output_shape`: Specifies how to compute the output shape of the layer - given the input shape. - -Optionally, you may also implement the method `get_config()` and the -class method `from_config()` if you want your layer to be serializable. - -Here's a simple example of a custom layer that implements a `matmul` -of an input with a kernel matrix: +By default, this saves the model's weights in the +[TensorFlow checkpoint](/get_started/checkpoints) file format. Weights can also +be saved to the Keras HDF5 format (the default for the multi-backend +implementation of Keras): ```python -import tensorflow as tf -from tensorflow.keras import layers - -class MyLayer(layers.Layer): - - def __init__(self, output_dim, **kwargs): - self.output_dim = output_dim - super(MyLayer, self).__init__(**kwargs) - - def build(self, input_shape): - # Create a trainable weight variable for this layer. - self.kernel = self.add_weight(name='kernel', - shape=(input_shape[1], self.output_dim), - initializer='uniform', - trainable=True) - # Be sure to call this at the end - super(MyLayer, self).build(input_shape) - - def call(self, inputs): - return tf.matmul(inputs, self.kernel) - - def compute_output_shape(self, input_shape): - shape = tf.TensorShape(input_shape).as_list() - shape[-1] = self.output_dim - return tf.TensorShape(shape) - - def get_config(self): - base_config = super(MyLayer, self).get_config() - base_config['output_dim'] = self.output_dim - - @classmethod - def from_config(cls, config): - return cls(**config) -``` +# Save weights to a HDF5 file +model.save_weights('my_model.h5', save_format='h5') ---- - -## Eager execution +# Restore the model's state +model.load_weights('my_model.h5') +``` -[Eager execution](https://www.tensorflow.org/programmers_guide/eager) -is a way to write TensorFlow code imperatively. -All three `tf.keras` model-building APIs -(`Sequential`, the functional API `Model(inputs, outputs)`, -and the subclassing API `MyModel(Model)`) are compatible with eager execution. -When using `Sequential` or the functional API, it makes no difference to the -user experience whether the model is executing eagerly or not. -Eager execution is most beneficial when used with the `Model` subclassing API, -or when prototyping a custom layer -- that is to say, in APIs that require you -to *write a forward pass as code*, rather than in APIs that allow you to create -models by assembling together existing layers. +### Configuration only -While the same training and evaluating APIs presented in this guide work -as usual with eager execution, you can in addition -write custom training loops using the eager `GradientTape` -and define-by-run autodifferentiation: +A model's configuration can be saved—this serializes the model architecture +without any weights. A saved configuration can recreate and initialize the same +model, even without the code that defined the original model. Keras supports +JSON and YAML serialization formats: ```python -import tensorflow as tf -from tensorflow.contrib import eager as tfe - -# This call begins the eager execution session. -tf.enable_eager_execution() - -model = ... # Defines a Keras model (we recommend Model subclassing in this case). -dataset = ... # Defines a `tf.data` dataset. +# Serialize a model to JSON format +json_string = model.to_json() -optimizer = tf.train.AdamOptimizer(0.01) +# Recreate the model (freshly initialized) +fresh_model = keras.models.from_json(json_string) -for data, labels in dataset: - # Runs the forward pass and loss computation under a `GradientTape` scope, - # which will record all operations in order to prepare for the backward pass. - with tfe.GradientTape() as tape: - predictions = model(data) - loss = loss_function(labels, predictions) +# Serializes a model to YAML format +yaml_string = model.to_yaml() - # Runs the backward pass manually using the operations recorded - # by the gradient tape. - grads = tape.gradient(loss, model.trainable_weights) - optimizer.apply_gradients(zip(grads, model.trainable_weights), - global_step=tf.train.get_or_create_global_step()) +# Recreate the model +fresh_model = keras.models.from_yaml(yaml_string) ``` ---- +Caution: Subclassed models are not serializable because their architecture is +defined by the Python code in the body of the `call` method. -## Further reading -### Documentation +### Entire model -- [tf.keras documentation](https://www.tensorflow.org/api_docs/python/tf/keras) -- [keras.io](https://keras.io/) +The entire model can be saved to a file that contains the weight values, the +model's configuration, and even the optimizer's configuration. This allows you +to checkpoint a model and resume training later—from the exact same +state—without access to the original code. -### tf.keras tutorials and examples - -- [Fashion-MNIST with tf.Keras](https://medium.com/tensorflow/hello-deep-learning-fashion-mnist-with-keras-50fcff8cd74a) -- [Predicting the price of wine with the Keras Functional API and TensorFlow]( - https://medium.com/tensorflow/predicting-the-price-of-wine-with-the-keras-functional-api-and-tensorflow-a95d1c2c1b03) +```python +# Create a trivial model +model = keras.Sequential([ + keras.layers.Dense(10, activation='softmax', input_shape=(32,)), + keras.layers.Dense(10, activation='softmax') +]) +model.compile(optimizer='rmsprop', + loss='categorical_crossentropy', + metrics=['accuracy']) +model.fit(data, targets, batch_size=32, epochs=5) ---- +# Save entire model to a HDF5 file +model.save('my_model.h5') -## FAQ +# Recreate the exact same model, including weights and optimizer. +model = keras.models.load_model('my_model.h5') +``` -### What are the differences between tf.keras and the multi-backend Keras implementation? -`tf.keras` includes first-class support for important TensorFlow-specific -functionality not found in other Keras implementations, in particular: +## Eager execution -- Support for eager execution. -- Support for the `tf.data` API. -- Integration with the - [`tf.estimator` API](https://www.tensorflow.org/programmers_guide/estimators), - via `tf.keras.estimator.model_to_estimator`. +[Eager execution](/programmers_guide/eager) is an imperative programming +environment that evaluates operations immediately. This is not required for +Keras, but is supported by `tf.keras` and useful for inspecting your program and +debugging. -In terms of API differences: `tf.keras` is a full implementation of the -Keras API, so any code targeting the Keras API will run on `tf.keras`. -However, keep in mind that: +All of the `tf.keras` model-building APIs are compatible with eager execution. +And while the `Sequential` and functional APIs can be used, eager execution +especially benefits *model subclassing* and building *custom layers*—the APIs +that require you to write the forward pass as code (instead of the APIs that +create models by assembling existing layers). -- The `tf.keras` API version in the latest TensorFlow release might not be the - same as the latest `keras` version from PyPI. - Check out `tf.keras.__version__` if in doubt. -- In `tf.keras`, the default file format saved by `model.save_weights` is the - TensorFlow `SavedModel` format. - To use HDF5, you can pass the `format='h5'` argument. +See the [eager execution guide](/programmers_guide/eager#build_a_model) for +examples of using Keras models with custom training loops and `tf.GradientTape`. -### What is the relationship between tf.keras and tf.estimator? +## Distribution -The [`tf.estimator` API](https://www.tensorflow.org/programmers_guide/estimators) -is a high-level TensorFlow API for training "estimator" models, -in particular in distributed settings. -This API targets industry use cases, such as distributed training -on large datasets with a focus on eventually exporting a production model. +### Estimators -If you have a `tf.keras` model that would like to train with the `tf.estimator` -API, you can convert your model to an `Estimator` object via the -`model_to_estimator` utility](https://www.tensorflow.org/programmers_guide/estimators#creating_estimators_from_keras_models): +The [Estimators](/programmers_guide/estimators) API is used for training models +for distributed environments. This targets industry use cases such as +distributed training on large datasets that can export a model for production. +A `tf.keras.Model` can be trained with the `tf.estimator` API by converting the +model to an `tf.estimator.Estimator` object with +`tf.keras.estimator.model_to_estimator`. See +[Creating Estimators from Keras models](/programmers_guide/estimators#creating_estimators_from_keras_models). ```python -estimator = tf.keras.estimator.model_to_estimator(model) -``` +model = keras.Sequential([layers.Dense(10,activation='softmax'), + layers.Dense(10,activation='softmax')]) -When using `model_to_estimator`, enabling eager execution is helpful for -developing and debugging your `input_fn` -(as it allows you to easily print your data). +model.compile(optimizer=tf.train.RMSPropOptimizer(0.001), + loss='categorical_crossentropy', + metrics=['accuracy']) + +estimator = keras.estimator.model_to_estimator(model) +``` +Note: Enable [eager execution](/programmers_guide/eager) for debugging +[Estimator input functions](/programmers_guide/premade_estimators#create_input_functions) +and inspecting data. -### How can I run tf.keras models on multiple GPUs? +### Multiple GPUs -You can run tf.keras models on multiple GPUs using the -[`DistributionStrategy API`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/distribute/DistributionStrategy). -The `DistributionStrategy` API allow you to distribute training on multiple GPUs -with almost no changes to your existing code. +`tf.keras` models can run on multiple GPUs using +`tf.contrib.distribute.DistributionStrategy`. This API provides distributed +training on multiple GPUs with almost no changes to existing code. -Currently [`MirroredStrategy`](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/distribute/MirroredStrategy) -is the only supported strategy. -`MirroredStrategy` allows you to do in-graph replication with synchronous -training using all-reduce on a single machine. -To use `DistributionStrategy` with a `tf.keras` model, -you can use the `model_to_estimator` utility to convert a `tf.keras` model to -an `Estimator` and then train the estimator. +Currently, `tf.contrib.distribute.MirroredStrategy` is the only supported +distribution strategy. `MirroredStrategy` does in-graph replication with +synchronous training using all-reduce on a single machine. To use +`DistributionStrategy` with Keras, convert the `tf.keras.Model` to a +`tf.estimator.Estimator` with `tf.keras.estimator.model_to_estimator`, then +train the estimator -Here is a simple example of distributing a `tf.keras` model across multiple GPUs -on a single machine. +The following example distributes a `tf.keras.Model` across multiple GPUs on a +single machine. -Let's first define a simple model: +First, define a simple model: ```python -model = tf.keras.Sequential() -model.add(tf.keras.layers.Dense(16, activation='relu', input_shape=(10,))) -model.add(tf.keras.layers.Dense(1, activation='sigmoid')) +model = keras.Sequential() +model.add(keras.layers.Dense(16, activation='relu', input_shape=(10,))) +model.add(keras.layers.Dense(1, activation='sigmoid')) + optimizer = tf.train.GradientDescentOptimizer(0.2) + model.compile(loss='binary_crossentropy', optimizer=optimizer) model.summary() ``` -Let's use `model_to_estimator` to create an `Estimator` instance from the -`tf.keras` model defined above. +Convert the Keras model to a `tf.estimator.Estimator` instance: ```python -keras_estimator = tf.keras.estimator.model_to_estimator( - keras_model=model, - config=config, - model_dir='/tmp/model_dir') +keras_estimator = keras.estimator.model_to_estimator( + keras_model=model, + config=config, + model_dir='/tmp/model_dir') ``` -We'll use `tf.data.Datasets` to define our input pipeline. -Our `input_fn` returns a `tf.data.Dataset` object that we then use to distribute -the data across multiple devices with each device processing +Define an *input pipeline*. The `input_fn` returns a `tf.data.Dataset` object +used to distribute the data across multiple devices—with each device processing a slice of the input batch. ```python def input_fn(): - x = np.random.random((1024, 10)) - y = np.random.randint(2, size=(1024, 1)) - x = tf.cast(x, tf.float32) - dataset = tf.data.Dataset.from_tensor_slices((x, y)) - dataset = dataset.repeat(10) - dataset = dataset.batch(32) - return dataset + x = np.random.random((1024, 10)) + y = np.random.randint(2, size=(1024, 1)) + x = tf.cast(x, tf.float32) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + dataset = dataset.repeat(10) + dataset = dataset.batch(32) + return dataset ``` -The next step is to create a `RunConfig` and set the train_distribute argument -to the new `MirroredStrategy` instance. -You can specify a list of devices or the `num_gpus` argument when creating -a `MirroredStrategy` instance. -Not specifying any arguments defaults to using all the available GPUs like we do -in this example. +Next, create a `tf.estimator.RunConfig` and set the `train_distribute` argument +to the `tf.contrib.distribute.MirroredStrategy` instance. When creating +`MirroredStrategy`, you can specify a list of devices or set the `num_gpus` +argument. The default uses all available GPUs, like the following: ```python strategy = tf.contrib.distribute.MirroredStrategy() config = tf.estimator.RunConfig(train_distribute=strategy) ``` -Call train on the `Estimator` instance providing the `input_fn` and `steps` -arguments as input: +Finally, train the `Estimator` instance by providing the `input_fn` and `steps` +arguments: ```python keras_estimator.train(input_fn=input_fn, steps=10) -- GitLab From 7e859ebc65bf7d77ed89f736c7fd6fede0a93c92 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Mon, 18 Jun 2018 11:07:48 -0700 Subject: [PATCH 0070/2038] Add missing Eager relnotes for TensorFlow 1.9. (#20101) --- RELEASE.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 879ce6e440..510eca5467 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -22,6 +22,8 @@ * (C++) `DatasetBase::MakeIterator()` has been renamed to `DatasetBase::MakeIteratorInternal()`. * (C++) `IteratorBase::Initialize()` method was added to support raising errors during iterator construction. * Eager Execution: + * Added the ability to pause recording operations for gradient computation via `tf.GradientTape.stop_recording`. + * Updated documentation, introductory notebooks. * `tf.keras`: * Move Keras code out of _impl folder and remove API files. * `tf.keras.Model.save_weights` now saves in TensorFlow format by default. -- GitLab From 9e40df6773e1d29d78bfebed7c97e830fc92bd69 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 18 Jun 2018 16:40:54 -0700 Subject: [PATCH 0071/2038] Update api_def_Exp.pbtxt --- tensorflow/core/api_def/base_api/api_def_Exp.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt index 01ac3d433a..dd1e3d5dfc 100644 --- a/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt @@ -1,4 +1,4 @@ op { graph_op_name: "Exp" - summary: "Computes exponential of x element-wise. \\(y = e^x\\)." + summary: "Computes exponential of x element-wise. \\\\(y = e^x\\\\)." } -- GitLab From 2b654c6e9b5c914b3bd0be304ab51c5fc39b4762 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 18 Jun 2018 16:41:27 -0700 Subject: [PATCH 0072/2038] Update api_def_GatherNd.pbtxt --- tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt index 342a1f6b05..c156e8854c 100644 --- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt @@ -25,7 +25,7 @@ END (K-1)-dimensional tensor of indices into `params`, where each element defines a slice of `params`: - output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] + output[\\\\(i_0, ..., i_{K-2}\\\\)] = params[indices[\\\\(i_0, ..., i_{K-2}\\\\)]] Whereas in @{tf.gather} `indices` defines slices into the first dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the -- GitLab From 5004bf9e463d1da909a0c62b6fc2f51bb00ed0eb Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 18 Jun 2018 16:41:49 -0700 Subject: [PATCH 0073/2038] Update api_def_MatrixExponential.pbtxt --- .../core/api_def/base_api/api_def_MatrixExponential.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt index d7b56aec87..cfd8a6d391 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt @@ -18,7 +18,7 @@ END } summary: "Computes the matrix exponential of one or more square matrices:" description: < Date: Mon, 18 Jun 2018 16:42:08 -0700 Subject: [PATCH 0074/2038] Update api_def_MatrixLogarithm.pbtxt --- tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt index 9e80064d15..05bf8ce76a 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt @@ -20,7 +20,7 @@ END summary: "Computes the matrix logarithm of one or more square matrices:" description: < Date: Mon, 18 Jun 2018 17:37:27 -0700 Subject: [PATCH 0075/2038] Update api_def_GatherNd.pbtxt --- tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt index c156e8854c..342a1f6b05 100644 --- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt @@ -25,7 +25,7 @@ END (K-1)-dimensional tensor of indices into `params`, where each element defines a slice of `params`: - output[\\\\(i_0, ..., i_{K-2}\\\\)] = params[indices[\\\\(i_0, ..., i_{K-2}\\\\)]] + output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] Whereas in @{tf.gather} `indices` defines slices into the first dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the -- GitLab From 994a04808f71d19984420b70ce31fe66f06b174f Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 18 Jun 2018 17:37:42 -0700 Subject: [PATCH 0076/2038] Update api_def_MatrixExponential.pbtxt --- .../core/api_def/base_api/api_def_MatrixExponential.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt index cfd8a6d391..d7b56aec87 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt @@ -18,7 +18,7 @@ END } summary: "Computes the matrix exponential of one or more square matrices:" description: < Date: Mon, 18 Jun 2018 17:38:01 -0700 Subject: [PATCH 0077/2038] Update api_def_MatrixLogarithm.pbtxt --- tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt index 05bf8ce76a..9e80064d15 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt @@ -20,7 +20,7 @@ END summary: "Computes the matrix logarithm of one or more square matrices:" description: < Date: Mon, 18 Jun 2018 17:38:27 -0700 Subject: [PATCH 0078/2038] Update api_def_Polygamma.pbtxt --- tensorflow/core/api_def/base_api/api_def_Polygamma.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Polygamma.pbtxt b/tensorflow/core/api_def/base_api/api_def_Polygamma.pbtxt index 9196f5dd19..10bf370f54 100644 --- a/tensorflow/core/api_def/base_api/api_def_Polygamma.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Polygamma.pbtxt @@ -1,6 +1,6 @@ op { graph_op_name: "Polygamma" - summary: "Compute the polygamma function \\(\psi^{(n)}(x)\\)." + summary: "Compute the polygamma function \\\\(\\psi^{(n)}(x)\\\\)." description: < Date: Mon, 18 Jun 2018 17:39:39 -0700 Subject: [PATCH 0079/2038] Update api_def_Zeta.pbtxt --- tensorflow/core/api_def/base_api/api_def_Zeta.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Zeta.pbtxt b/tensorflow/core/api_def/base_api/api_def_Zeta.pbtxt index a87bdaed90..c02860a16a 100644 --- a/tensorflow/core/api_def/base_api/api_def_Zeta.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Zeta.pbtxt @@ -1,6 +1,6 @@ op { graph_op_name: "Zeta" - summary: "Compute the Hurwitz zeta function \\(\zeta(x, q)\\)." + summary: "Compute the Hurwitz zeta function \\\\(\\zeta(x, q)\\\\)." description: < Date: Mon, 18 Jun 2018 20:11:26 -0700 Subject: [PATCH 0080/2038] eager env_ and env_rendezvous_mgr valid --- tensorflow/core/distributed_runtime/eager/eager_service_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc index 94f39095e1..ca4ed02d85 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc @@ -81,7 +81,7 @@ Status GetNumRetvals(tensorflow::EagerContext* context, const string& op_name, Status EagerServiceImpl::CreateContext(const CreateContextRequest* request, CreateContextResponse* response) { if (env_ == nullptr || env_->rendezvous_mgr == nullptr) { - return errors::InvalidArgument("invalid eager env_ or env_->rendezvous_mgr."); + return errors::InvalidArgument("invalid eager env_ or env_->rendezvous_mgr."); } tensorflow::RemoteRendezvous* r = env_->rendezvous_mgr->Find(0); std::vector devices; -- GitLab From 4647f36e5a61c540c760f0952be5952c5f69118d Mon Sep 17 00:00:00 2001 From: "Li, Yiqiang" Date: Tue, 19 Jun 2018 12:32:23 +0800 Subject: [PATCH 0081/2038] [INTEL_MKL] Fix reorder creation failure in MklConcat op. --- tensorflow/core/kernels/mkl_concat_op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 31d1b949ef..d054f0d404 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -704,14 +704,14 @@ class MklConcatOp : public OpKernel { if (input_tensors[k].NumElements() == 0) continue; - auto src_dims = TFShapeToMklDnnDims( - mkl_input_shapes[k].GetTfShape()); auto src_md = mkl_input_shapes[k].GetMklLayout(); srcs[k].SetUsrMem(src_md, &input_tensors[k]); - if (src_md.data.format != mkl_common_format) + if (src_md.data.format != mkl_common_format) { + memory::dims src_dims(src_md.data.dims, &src_md.data.dims[src_md.data.ndims]); src_md = memory::desc(src_dims, MklDnnType(), mkl_common_format); + } srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine)); } -- GitLab From df1134694f47b9a924df1bf48f673392ee1d3e15 Mon Sep 17 00:00:00 2001 From: Grzegorz Pawelczak Date: Tue, 19 Jun 2018 09:08:00 +0100 Subject: [PATCH 0082/2038] Add comments explaining why the test is skipped for f16 --- tensorflow/compiler/tests/adam_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/compiler/tests/adam_test.py b/tensorflow/compiler/tests/adam_test.py index ee56a38f94..b904e6676b 100644 --- a/tensorflow/compiler/tests/adam_test.py +++ b/tensorflow/compiler/tests/adam_test.py @@ -52,6 +52,7 @@ class AdamOptimizerTest(XLATestCase): def testBasic(self): for dtype in self.float_types: + # TODO: test fails for float16 due to excessive precision requirements. if dtype == np.float16: continue with self.test_session(), self.test_scope(): @@ -93,6 +94,7 @@ class AdamOptimizerTest(XLATestCase): def testTensorLearningRate(self): for dtype in self.float_types: + # TODO: test fails for float16 due to excessive precision requirements. if dtype == np.float16: continue with self.test_session(), self.test_scope(): @@ -134,6 +136,7 @@ class AdamOptimizerTest(XLATestCase): def testSharing(self): for dtype in self.float_types: + # TODO: test fails for float16 due to excessive precision requirements. if dtype == np.float16: continue with self.test_session(), self.test_scope(): -- GitLab From 39b756322b96d4296c5f077991768534077d381e Mon Sep 17 00:00:00 2001 From: Grzegorz Pawelczak Date: Tue, 19 Jun 2018 09:14:43 +0100 Subject: [PATCH 0083/2038] Use the max value of the type as upper bound to minimise chances of repeated numbers --- tensorflow/compiler/tests/random_ops_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tests/random_ops_test.py b/tensorflow/compiler/tests/random_ops_test.py index df458ba8b9..9c7b5a361b 100644 --- a/tensorflow/compiler/tests/random_ops_test.py +++ b/tensorflow/compiler/tests/random_ops_test.py @@ -54,7 +54,11 @@ class RandomOpsTest(XLATestCase): def testRandomUniformIsNotConstant(self): def rng(dtype): - return random_ops.random_uniform(shape=[2], dtype=dtype, maxval=100) + if np.issubdtype(dtype, np.float): + maxval=np.finfo(dtype).max + else: + maxval=np.iinfo(dtype).max + return random_ops.random_uniform(shape=[2], dtype=dtype, maxval=maxval) for dtype in self._random_types(): self._testRngIsNotConstant(rng, dtype) -- GitLab From 86a6b0d7efbe5a3fa1f511237b85c926a6aef3a5 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 19 Jun 2018 17:47:37 -0700 Subject: [PATCH 0084/2038] [GCS] Typo in ConfigureGcsHook. This commit fixes a typo on ConfigureGcsHook that prevented its correct operation. --- tensorflow/contrib/cloud/python/ops/gcs_config_ops.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cloud/python/ops/gcs_config_ops.py b/tensorflow/contrib/cloud/python/ops/gcs_config_ops.py index 8c8c5acb31..4f7300fd1f 100644 --- a/tensorflow/contrib/cloud/python/ops/gcs_config_ops.py +++ b/tensorflow/contrib/cloud/python/ops/gcs_config_ops.py @@ -120,13 +120,17 @@ class ConfigureGcsHook(training.SessionRunHook): def begin(self): if self._credentials: self._credentials_placeholder = array_ops.placeholder(dtypes.string) - self._credentials_ops = gen_gcs_config_ops.gcs_configure_credentials( + self._credentials_op = gen_gcs_config_ops.gcs_configure_credentials( self._credentials_placeholder) + else: + self._credentials_op = None if self._block_cache: self._block_cache_op = gen_gcs_config_ops.gcs_configure_block_cache( max_cache_size=self._block_cache.max_bytes, block_size=self._block_cache.block_size, max_staleness=self._block_cache.max_staleness) + else: + self._block_cache_op = None def after_create_session(self, session, coord): del coord -- GitLab From 0a60bfccad8df81a57459e4393913b8fb2cb53c1 Mon Sep 17 00:00:00 2001 From: xxxx001 <352172@gmail.com> Date: Tue, 19 Jun 2018 21:41:14 -0700 Subject: [PATCH 0085/2038] eager env_ or env_RpcRendezvousMgr --- .../eager/eager_service_impl_test.cc | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc index f865ebe1be..ae418287d3 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc @@ -122,9 +122,8 @@ tensorflow::FunctionDef MatMulFunction() { TEST(EagerServiceImplTest, BasicTest) { WorkerEnv worker_env; worker_env.env = Env::Default(); - tensorflow::RpcRendezvousMgr rm(&worker_env); - worker_env.rendezvous_mgr = &rm; - + + Status cons_status; TestEagerServiceImpl eager_service_impl(&worker_env); CreateContextRequest request; @@ -132,6 +131,12 @@ TEST(EagerServiceImplTest, BasicTest) { request.mutable_server_def()->set_task_index(0); CreateContextResponse response; + cons_status = eager_service_impl.CreateContext(&request, &response); + EXPECT_EQ(cons_status.error_message(), "invalid eager env_ or env_->rendezvous_mgr."); + + tensorflow::RpcRendezvousMgr rm(&worker_env); + worker_env.rendezvous_mgr = &rm; + TF_ASSERT_OK(eager_service_impl.CreateContext(&request, &response)); uint64 context_id = response.context_id(); -- GitLab From 1cf5178d0ddf32dddc13cdb00783aa0b79974fe8 Mon Sep 17 00:00:00 2001 From: xxxx001 <352172@gmail.com> Date: Wed, 20 Jun 2018 19:50:29 -0700 Subject: [PATCH 0086/2038] modify eager_service_impl.cc and eager_service_impl_test.cc --- .../core/distributed_runtime/eager/eager_service_impl.cc | 3 ++- .../distributed_runtime/eager/eager_service_impl_test.cc | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc index ca4ed02d85..428bd7423f 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl.cc @@ -80,8 +80,9 @@ Status GetNumRetvals(tensorflow::EagerContext* context, const string& op_name, Status EagerServiceImpl::CreateContext(const CreateContextRequest* request, CreateContextResponse* response) { + //make sure env_ , env_->rendezvous_mgr available if (env_ == nullptr || env_->rendezvous_mgr == nullptr) { - return errors::InvalidArgument("invalid eager env_ or env_->rendezvous_mgr."); + return tensorflow::errors::Internal("invalid eager env_ or env_->rendezvous_mgr."); } tensorflow::RemoteRendezvous* r = env_->rendezvous_mgr->Find(0); std::vector devices; diff --git a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc index ae418287d3..f65d129673 100644 --- a/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc +++ b/tensorflow/core/distributed_runtime/eager/eager_service_impl_test.cc @@ -132,12 +132,16 @@ TEST(EagerServiceImplTest, BasicTest) { CreateContextResponse response; cons_status = eager_service_impl.CreateContext(&request, &response); - EXPECT_EQ(cons_status.error_message(), "invalid eager env_ or env_->rendezvous_mgr."); + + EXPECT_NE(cons_status,Status::OK()); + EXPECT_NE(cons_status.ok(),Status::OK().ok()); tensorflow::RpcRendezvousMgr rm(&worker_env); worker_env.rendezvous_mgr = &rm; - TF_ASSERT_OK(eager_service_impl.CreateContext(&request, &response)); + cons_status = eager_service_impl.CreateContext(&request, &response); + TF_ASSERT_OK(cons_status); + EXPECT_EQ(cons_status,Status::OK()); uint64 context_id = response.context_id(); -- GitLab From cb255e292b6a4378990d02557a37c89d751edb8a Mon Sep 17 00:00:00 2001 From: Christian Ertler Date: Thu, 21 Jun 2018 16:36:49 +0200 Subject: [PATCH 0087/2038] Adding tf.image.non_max_suppression_overlaps This commit introduces tf.image.non_max_suppression_overlaps. It allows to perform non-max-suppression with an overlap criterion different to IOU by providing a n-by-n matrix with precomputed overlap values for each box pair. --- ...pi_def_NonMaxSuppressionWithOverlaps.pbtxt | 62 +++++ ...pi_def_NonMaxSuppressionWithOverlaps.pbtxt | 4 + .../core/kernels/non_max_suppression_op.cc | 181 +++++++++++--- .../kernels/non_max_suppression_op_test.cc | 229 ++++++++++++++++++ tensorflow/core/ops/image_ops.cc | 32 +++ tensorflow/python/ops/image_ops_impl.py | 42 ++++ 6 files changed, 515 insertions(+), 35 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt b/tensorflow/core/api_def/base_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt new file mode 100644 index 0000000000..180edb15a4 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_NonMaxSuppressionWithOverlaps.pbtxt @@ -0,0 +1,62 @@ +op { + graph_op_name: "NonMaxSuppressionWithOverlaps" + in_arg { + name: "overlaps" + description: <