From 67dee0adc09534483ce2627ffee629feb5133ae7 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Fri, 6 Apr 2018 03:26:26 +0800 Subject: [PATCH 0001/1310] Fix math equation rendering format in api definitions --- tensorflow/core/api_def/base_api/api_def_Exp.pbtxt | 2 +- .../core/api_def/base_api/api_def_GatherNd.pbtxt | 2 +- .../api_def/base_api/api_def_MatrixExponential.pbtxt | 2 +- .../api_def/base_api/api_def_MatrixLogarithm.pbtxt | 2 +- .../core/api_def/base_api/api_def_Polygamma.pbtxt | 2 +- .../core/api_def/base_api/api_def_ReduceJoin.pbtxt | 2 +- .../core/api_def/base_api/api_def_ScatterNdAdd.pbtxt | 4 ++-- .../base_api/api_def_ScatterNdNonAliasingAdd.pbtxt | 4 ++-- .../core/api_def/base_api/api_def_ScatterNdSub.pbtxt | 4 ++-- .../api_def/base_api/api_def_ScatterNdUpdate.pbtxt | 4 ++-- tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt | 2 +- .../api_def/base_api/api_def_SparseApplyAdagrad.pbtxt | 4 ++-- .../base_api/api_def_SparseApplyCenteredRMSProp.pbtxt | 6 +++--- .../api_def/base_api/api_def_SparseApplyFtrl.pbtxt | 10 +++++----- .../api_def/base_api/api_def_SparseApplyMomentum.pbtxt | 4 ++-- .../base_api/api_def_SparseApplyProximalAdagrad.pbtxt | 8 ++++---- .../api_def_SparseApplyProximalGradientDescent.pbtxt | 4 ++-- .../api_def/base_api/api_def_SparseApplyRMSProp.pbtxt | 6 +++--- .../api_def/base_api/api_def_UnsortedSegmentSum.pbtxt | 2 +- tensorflow/core/api_def/base_api/api_def_Zeta.pbtxt | 2 +- 20 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt b/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt index dd1e3d5dfc..01ac3d433a 100644 --- a/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Exp.pbtxt @@ -1,4 +1,4 @@ op { graph_op_name: "Exp" - summary: "Computes exponential of x element-wise. \\\\(y = e^x\\\\)." + summary: "Computes exponential of x element-wise. \\(y = e^x\\)." } diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt index 6cd76ff340..342a1f6b05 100644 --- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt @@ -25,7 +25,7 @@ END (K-1)-dimensional tensor of indices into `params`, where each element defines a slice of `params`: - output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]] + output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] Whereas in @{tf.gather} `indices` defines slices into the first dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt index 0d680f6531..d7b56aec87 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt @@ -18,7 +18,7 @@ END } summary: "Computes the matrix exponential of one or more square matrices:" description: < l1 else 0.0 -accum = accum_new +$$accum_new = accum + grad * grad$$ +$$linear += grad + (accum_{new}^{-lr_{power}} - accum^{-lr_{power}} / lr * var$$ +$$quadratic = 1.0 / (accum_{new}^{lr_{power}} * lr) + 2 * l2$$ +$$var = (sign(linear) * l1 - linear) / quadratic\ if\ |linear| > l1\ else\ 0.0$$ +$$accum = accum_{new}$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt index 8d9ac9ea3f..17dbb488de 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt @@ -64,7 +64,7 @@ Set use_nesterov = True if you want to use Nesterov momentum. That is for rows we have grad for, we update var and accum as follows: -accum = accum * momentum + grad -var -= lr * accum +$$accum = accum * momentum + grad$$ +$$var -= lr * accum$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt index 80541b91c7..0b24f2ddd1 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt @@ -58,9 +58,9 @@ END summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm." description: < Date: Tue, 10 Apr 2018 21:10:51 +0800 Subject: [PATCH 0002/1310] Remove breaking ``` for math equations --- tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt | 2 -- .../api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt | 4 +--- tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt | 4 +--- .../core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt | 4 +--- .../core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt | 2 +- 5 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt index ee0578c2ec..a9a7646314 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ -``` For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that addition would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt index 1e4f99006a..35116e5f6a 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt @@ -37,7 +37,7 @@ respect to both `input` and `updates`. `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `input`. -It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or `(P-K)`-dimensional slices @@ -45,9 +45,7 @@ indices into elements (if `K = P`) or `(P-K)`-dimensional slices `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$ -``` For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that addition would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt index e8fdd71785..99e5c4908b 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt @@ -42,7 +42,7 @@ within a given variable according to `indices`. `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `ref`. -It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ -``` For example, say we want to subtract 4 scattered elements from a rank-1 tensor with 8 elements. In Python, that subtraction would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt index 556a5d559b..cb57c171b9 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt @@ -42,7 +42,7 @@ variable according to `indices`. `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `ref`. -It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` $$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ -``` For example, say we want to update 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that update would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index ac1499346c..9aeabd030d 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -20,7 +20,7 @@ Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of segments. Computes a tensor such that -`\\(output[i] = sum_{j...} data[j...]\\)` where the sum is over tuples `j...` such +\\(output[i] = sum_{j...} data[j...]\\) where the sum is over tuples `j...` such that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` need not be sorted and need not cover all values in the full range of valid values. -- GitLab From de6200e7f58b616d6169cc35946e85323da66053 Mon Sep 17 00:00:00 2001 From: eqy Date: Sun, 15 Apr 2018 23:52:04 -0700 Subject: [PATCH 0003/1310] fix command line example package path --- tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md index 495014c6fc..f8327daa08 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md +++ b/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md @@ -41,7 +41,7 @@ FlatBuffer to perform floating-point inference. ``` bazel run --config=opt \ - third_party/tensorflow/contrib/lite/toco:toco -- \ + //tensorflow/contrib/lite/toco:toco -- \ --savedmodel_directory=/tmp/saved_model \ --output_file=/tmp/foo.tflite ``` -- GitLab From 8dc3b3c453180211f4be5302f957664004e1ec04 Mon Sep 17 00:00:00 2001 From: apantykhin Date: Mon, 16 Apr 2018 20:40:51 +0400 Subject: [PATCH 0004/1310] add checking for input values in GANHead constructor --- .../gan/python/estimator/python/head_impl.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index a21358c50b..652ffee30a 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -25,6 +25,7 @@ from tensorflow.contrib.gan.python import train as tfgan_train from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.canned import head from tensorflow.python.framework import ops +from tensorflow.python.training import optimizer __all__ = [ 'GANHead', @@ -90,9 +91,24 @@ class GANHead(head._Head): # pylint: disable=protected-access name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. """ + + if not callable(generator_loss_fn): + raise TypeError('generator_loss_fn must be callable.') + if not callable(discriminator_loss_fn): + raise TypeError('discriminator_loss_fn must be callable.') + if not isinstance(generator_optimizer, optimizer.Optimizer): + raise TypeError('generator_optimizer must be Optimizer.') + if not isinstance(discriminator_optimizer, optimizer.Optimizer): + raise TypeError('discriminator_optimizer must be Optimizer.') + if not use_loss_summaries in [True, False, None]: + raise ValueError('use_loss_summaries must be True, False or None.') + if get_hooks_fn is not None and not callable(get_hooks_fn): + raise TypeError('get_hooks_fn must be callable.') + if name is not None and not isinstance(name, str): + raise TypeError('name must be string.') + if get_hooks_fn is None: get_hooks_fn = tfgan_train.get_sequential_train_hooks() - # TODO(joelshor): Validate inputs. if use_loss_summaries in [True, False]: generator_loss_fn = functools.partial( -- GitLab From 7e2929f0e429ba6f47365f034317138066dc2adb Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Fri, 20 Apr 2018 12:44:05 -0700 Subject: [PATCH 0005/1310] Roll forward the custom optimizers change --- .../core/grappler/optimizers/meta_optimizer.cc | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 22799311bc..3f8d42b98f 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -156,6 +156,19 @@ Status MetaOptimizer::InitializeOptimizersByName( VLOG(2) << "Can't register an optimizer by name: " << optimizer_name; } } + for (const auto& optimizer_config : cfg_.custom_optimizers()) { + auto custom_optimizer = CustomGraphOptimizerRegistry::CreateByNameOrNull( + optimizer_config.name()); + if (custom_optimizer) { + VLOG(2) << "Registered custom configurable graph optimizer: " + << optimizer_config.name(); + TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config)); + optimizers->push_back(std::move(custom_optimizer)); + } else { + VLOG(2) << "Can't register an optimizer by name: " + << optimizer_config.name(); + } + } return Status::OK(); } @@ -164,7 +177,8 @@ Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, VLOG(2) << "Optimize GrapplerItem: item.id=" << item.id; std::vector> optimizers; - bool register_by_name = !cfg_.optimizers().empty(); + bool register_by_name = + (!cfg_.optimizers().empty() || !cfg_.custom_optimizers().empty()); TF_RETURN_IF_ERROR(register_by_name ? InitializeOptimizersByName(&optimizers) : InitializeOptimizers(&optimizers)); @@ -321,7 +335,7 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.auto_parallel().enable() || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || cfg.debug_stripper() == RewriterConfig::ON || - !cfg.optimizers().empty(); + !cfg.optimizers().empty() || !cfg.custom_optimizers().empty(); } Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, -- GitLab From 6583c9a693b122a49f17e7ec99463c6c3b7dbe98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:22:24 +0800 Subject: [PATCH 0006/1310] CLN: move _safe_embedding_lookup_sparse to embedding_ops and make it public --- tensorflow/python/BUILD | 2 + .../python/feature_column/feature_column.py | 161 +----------------- tensorflow/python/ops/embedding_ops.py | 157 +++++++++++++++++ tensorflow/python/ops/nn.py | 1 + 4 files changed, 163 insertions(+), 158 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index bb32f4bbe0..6e2e546984 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1845,6 +1845,8 @@ py_library( ":math_ops", ":platform", ":resource_variable_ops", + ":sparse_ops", + ":tensor_shape", ":variables", ], ) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index c16c3cda48..f48634d0c7 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -2058,7 +2058,7 @@ def _create_categorical_column_weighted_sum(column, initializer=init_ops.zeros_initializer(), trainable=trainable, collections=weight_collections) - return _safe_embedding_lookup_sparse( + return embedding_ops.safe_embedding_lookup_sparse( weight, id_tensor, sparse_weights=weight_tensor, @@ -2479,7 +2479,7 @@ class _EmbeddingColumn( }) # Return embedding lookup result. - return _safe_embedding_lookup_sparse( + return embedding_ops.safe_embedding_lookup_sparse( embedding_weights=embedding_weights, sparse_ids=sparse_ids, sparse_weights=sparse_weights, @@ -2612,7 +2612,7 @@ class _SharedEmbeddingColumn( }) # Return embedding lookup result. - return _safe_embedding_lookup_sparse( + return embedding_ops.safe_embedding_lookup_sparse( embedding_weights=embedding_weights, sparse_ids=sparse_ids, sparse_weights=sparse_weights, @@ -3065,161 +3065,6 @@ def _collect_leaf_level_keys(cross): return leaf_level_keys -# TODO(zakaria): Move this to embedding_ops and make it public. -def _safe_embedding_lookup_sparse(embedding_weights, - sparse_ids, - sparse_weights=None, - combiner='mean', - default_id=None, - name=None, - partition_strategy='div', - max_norm=None): - """Lookup embedding results, accounting for invalid IDs and empty features. - - The partitioned embedding in `embedding_weights` must all be the same shape - except for the first dimension. The first dimension is allowed to vary as the - vocabulary size is not necessarily a multiple of `P`. `embedding_weights` - may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a - partitioner. - - Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs - with non-positive weight. For an entry with no features, the embedding vector - for `default_id` is returned, or the 0-vector if `default_id` is not supplied. - - The ids and weights may be multi-dimensional. Embeddings are always aggregated - along the last dimension. - - Args: - embedding_weights: A list of `P` float `Tensor`s or values representing - partitioned embedding `Tensor`s. Alternatively, a `PartitionedVariable` - created by partitioning along dimension 0. The total unpartitioned - shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the - vocab size and `e_1, ..., e_m` are the embedding dimensions. - sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the - ids. `d_0` is typically batch size. - sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing - float weights corresponding to `sparse_ids`, or `None` if all weights - are be assumed to be 1.0. - combiner: A string specifying how to combine embedding results for each - entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" - the default. - default_id: The id to use for an entry with no features. - name: A name for this operation (optional). - partition_strategy: A string specifying the partitioning strategy. - Currently `"div"` and `"mod"` are supported. Default is `"div"`. - max_norm: If not `None`, all embeddings are l2-normalized to max_norm before - combining. - - - Returns: - Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. - - Raises: - ValueError: if `embedding_weights` is empty. - """ - if embedding_weights is None: - raise ValueError('Missing embedding_weights %s.' % embedding_weights) - if isinstance(embedding_weights, variables.PartitionedVariable): - embedding_weights = list(embedding_weights) # get underlying Variables. - if not isinstance(embedding_weights, list): - embedding_weights = [embedding_weights] - if len(embedding_weights) < 1: - raise ValueError('Missing embedding_weights %s.' % embedding_weights) - - dtype = sparse_weights.dtype if sparse_weights is not None else None - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] - - with ops.name_scope(name, 'embedding_lookup', - embedding_weights + [sparse_ids, - sparse_weights]) as scope: - # Reshape higher-rank sparse ids and weights to linear segment ids. - original_shape = sparse_ids.dense_shape - original_rank_dim = sparse_ids.dense_shape.get_shape()[0] - original_rank = ( - array_ops.size(original_shape) - if original_rank_dim.value is None - else original_rank_dim.value) - sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ - math_ops.reduce_prod( - array_ops.slice(original_shape, [0], [original_rank - 1])), - array_ops.gather(original_shape, original_rank - 1)]) - if sparse_weights is not None: - sparse_weights = sparse_tensor_lib.SparseTensor( - sparse_ids.indices, - sparse_weights.values, sparse_ids.dense_shape) - - # Prune invalid ids and weights. - sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) - if combiner != 'sum': - sparse_ids, sparse_weights = _prune_invalid_weights( - sparse_ids, sparse_weights) - - # Fill in dummy values for empty features, if necessary. - sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, - default_id or - 0) - if sparse_weights is not None: - sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) - - result = embedding_ops.embedding_lookup_sparse( - embedding_weights, - sparse_ids, - sparse_weights, - combiner=combiner, - partition_strategy=partition_strategy, - name=None if default_id is None else scope, - max_norm=max_norm) - - if default_id is None: - # Broadcast is_row_empty to the same shape as embedding_lookup_result, - # for use in Select. - is_row_empty = array_ops.tile( - array_ops.reshape(is_row_empty, [-1, 1]), - array_ops.stack([1, array_ops.shape(result)[1]])) - - result = array_ops.where(is_row_empty, - array_ops.zeros_like(result), - result, - name=scope) - - # Reshape back from linear ids back into higher-dimensional dense result. - final_result = array_ops.reshape( - result, - array_ops.concat([ - array_ops.slice( - math_ops.cast(original_shape, dtypes.int32), [0], - [original_rank - 1]), - array_ops.slice(array_ops.shape(result), [1], [-1]) - ], 0)) - final_result.set_shape(tensor_shape.unknown_shape( - (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) - return final_result - - -def _prune_invalid_ids(sparse_ids, sparse_weights): - """Prune invalid IDs (< 0) from the input ids and weights.""" - is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) - if sparse_weights is not None: - is_id_valid = math_ops.logical_and( - is_id_valid, - array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) - if sparse_weights is not None: - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) - return sparse_ids, sparse_weights - - -def _prune_invalid_weights(sparse_ids, sparse_weights): - """Prune invalid weights (< 0) from the input ids and weights.""" - if sparse_weights is not None: - is_weights_valid = math_ops.greater(sparse_weights.values, 0) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) - return sparse_ids, sparse_weights - - class _IndicatorColumn(_DenseColumn, _SequenceDenseColumn, collections.namedtuple('_IndicatorColumn', ['categorical_column'])): diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 9e46739bc1..a8cfeca119 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -23,6 +23,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops # Imports gradient definitions. @@ -30,6 +31,7 @@ from tensorflow.python.ops import data_flow_grad # pylint: disable=unused-impor from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export @@ -480,3 +482,158 @@ def embedding_lookup_sparse(params, assert False, "Unrecognized combiner" return embeddings + + +@tf_export("nn.safe_embedding_lookup_sparse") +def safe_embedding_lookup_sparse(embedding_weights, + sparse_ids, + sparse_weights=None, + combiner='mean', + default_id=None, + name=None, + partition_strategy='div', + max_norm=None): + """Lookup embedding results, accounting for invalid IDs and empty features. + + The partitioned embedding in `embedding_weights` must all be the same shape + except for the first dimension. The first dimension is allowed to vary as the + vocabulary size is not necessarily a multiple of `P`. `embedding_weights` + may be a `PartitionedVariable` as returned by using `tf.get_variable()` with a + partitioner. + + Invalid IDs (< 0) are pruned from input IDs and weights, as well as any IDs + with non-positive weight. For an entry with no features, the embedding vector + for `default_id` is returned, or the 0-vector if `default_id` is not supplied. + + The ids and weights may be multi-dimensional. Embeddings are always aggregated + along the last dimension. + + Args: + embedding_weights: A list of `P` float `Tensor`s or values representing + partitioned embedding `Tensor`s. Alternatively, a `PartitionedVariable` + created by partitioning along dimension 0. The total unpartitioned + shape should be `[e_0, e_1, ..., e_m]`, where `e_0` represents the + vocab size and `e_1, ..., e_m` are the embedding dimensions. + sparse_ids: `SparseTensor` of shape `[d_0, d_1, ..., d_n]` containing the + ids. `d_0` is typically batch size. + sparse_weights: `SparseTensor` of same shape as `sparse_ids`, containing + float weights corresponding to `sparse_ids`, or `None` if all weights + are be assumed to be 1.0. + combiner: A string specifying how to combine embedding results for each + entry. Currently "mean", "sqrtn" and "sum" are supported, with "mean" + the default. + default_id: The id to use for an entry with no features. + name: A name for this operation (optional). + partition_strategy: A string specifying the partitioning strategy. + Currently `"div"` and `"mod"` are supported. Default is `"div"`. + max_norm: If not `None`, all embeddings are l2-normalized to max_norm before + combining. + + + Returns: + Dense `Tensor` of shape `[d_0, d_1, ..., d_{n-1}, e_1, ..., e_m]`. + + Raises: + ValueError: if `embedding_weights` is empty. + """ + if embedding_weights is None: + raise ValueError('Missing embedding_weights %s.' % embedding_weights) + if isinstance(embedding_weights, variables.PartitionedVariable): + embedding_weights = list(embedding_weights) # get underlying Variables. + if not isinstance(embedding_weights, list): + embedding_weights = [embedding_weights] + if len(embedding_weights) < 1: + raise ValueError('Missing embedding_weights %s.' % embedding_weights) + + dtype = sparse_weights.dtype if sparse_weights is not None else None + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] + + with ops.name_scope(name, 'embedding_lookup', + embedding_weights + [sparse_ids, + sparse_weights]) as scope: + # Reshape higher-rank sparse ids and weights to linear segment ids. + original_shape = sparse_ids.dense_shape + original_rank_dim = sparse_ids.dense_shape.get_shape()[0] + original_rank = ( + array_ops.size(original_shape) + if original_rank_dim.value is None + else original_rank_dim.value) + sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ + math_ops.reduce_prod( + array_ops.slice(original_shape, [0], [original_rank - 1])), + array_ops.gather(original_shape, original_rank - 1)]) + if sparse_weights is not None: + sparse_weights = sparse_tensor.SparseTensor( + sparse_ids.indices, + sparse_weights.values, sparse_ids.dense_shape) + + # Prune invalid ids and weights. + sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) + if combiner != 'sum': + sparse_ids, sparse_weights = _prune_invalid_weights( + sparse_ids, sparse_weights) + + # Fill in dummy values for empty features, if necessary. + sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, + default_id or + 0) + if sparse_weights is not None: + sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) + + result = embedding_lookup_sparse( + embedding_weights, + sparse_ids, + sparse_weights, + combiner=combiner, + partition_strategy=partition_strategy, + name=None if default_id is None else scope, + max_norm=max_norm) + + if default_id is None: + # Broadcast is_row_empty to the same shape as embedding_lookup_result, + # for use in Select. + is_row_empty = array_ops.tile( + array_ops.reshape(is_row_empty, [-1, 1]), + array_ops.stack([1, array_ops.shape(result)[1]])) + + result = array_ops.where(is_row_empty, + array_ops.zeros_like(result), + result, + name=scope) + + # Reshape back from linear ids back into higher-dimensional dense result. + final_result = array_ops.reshape( + result, + array_ops.concat([ + array_ops.slice( + math_ops.cast(original_shape, dtypes.int32), [0], + [original_rank - 1]), + array_ops.slice(array_ops.shape(result), [1], [-1]) + ], 0)) + final_result.set_shape(tensor_shape.unknown_shape( + (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) + return final_result + + +def _prune_invalid_ids(sparse_ids, sparse_weights): + """Prune invalid IDs (< 0) from the input ids and weights.""" + is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) + if sparse_weights is not None: + is_id_valid = math_ops.logical_and( + is_id_valid, + array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) + if sparse_weights is not None: + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) + return sparse_ids, sparse_weights + + +def _prune_invalid_weights(sparse_ids, sparse_weights): + """Prune invalid weights (< 0) from the input ids and weights.""" + if sparse_weights is not None: + is_weights_valid = math_ops.greater(sparse_weights.values, 0) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) + return sparse_ids, sparse_weights diff --git a/tensorflow/python/ops/nn.py b/tensorflow/python/ops/nn.py index 1d0d9a52a1..fb896bf042 100644 --- a/tensorflow/python/ops/nn.py +++ b/tensorflow/python/ops/nn.py @@ -79,6 +79,7 @@ See the @{$python/nn} guide. @@weighted_cross_entropy_with_logits @@embedding_lookup @@embedding_lookup_sparse +@@safe_embedding_lookup_sparse @@dynamic_rnn @@bidirectional_dynamic_rnn @@raw_rnn -- GitLab From 608508c35a4b87a17b9f07364e6fbeae2fa948c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:32:38 +0800 Subject: [PATCH 0007/1310] CLN: move the corresponding test case --- .../python/layers/embedding_ops_test.py | 1 - tensorflow/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/embedding_ops_test.py | 218 ++++++++++++++++++ 3 files changed, 219 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index bf25144982..87f00f94a6 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -21,7 +21,6 @@ from __future__ import print_function import itertools import math -import sys import numpy as np diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index ba8f1fd3ca..2fbdde849b 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2717,6 +2717,7 @@ cuda_py_test( "//tensorflow/python:embedding_ops", "//tensorflow/python:framework", "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:init_ops", "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", "//tensorflow/python:partitioned_variables", diff --git a/tensorflow/python/kernel_tests/embedding_ops_test.py b/tensorflow/python/kernel_tests/embedding_ops_test.py index e53ca1dcaa..55d75cb474 100644 --- a/tensorflow/python/kernel_tests/embedding_ops_test.py +++ b/tensorflow/python/kernel_tests/embedding_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import itertools +import math import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin @@ -31,6 +32,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import init_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables @@ -736,6 +738,222 @@ class EmbeddingLookupSparseTest(test.TestCase): x, sp_ids, sp_weights, combiner="mean") +class SafeEmbeddingLookupSparseTest(test.TestCase): + + def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): + assert vocab_size > 0 + assert embed_dim > 0 + assert num_shards > 0 + assert num_shards <= vocab_size + + embedding_weights = partitioned_variables.create_partitioned_variables( + shape=[vocab_size, embed_dim], + slicing=[num_shards, 1], + initializer=init_ops.truncated_normal_initializer( + mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32)) + for w in embedding_weights: + w.initializer.run() + embedding_weights = [w.eval() for w in embedding_weights] + return embedding_weights + + def _ids_and_weights_2d(self): + # Each row demonstrates a test case: + # Row 0: multiple valid ids, 1 invalid id, weighted mean + # Row 1: all ids are invalid (leaving no valid ids after pruning) + # Row 2: no ids to begin with + # Row 3: single id + # Row 4: all ids have <=0 weight + indices = [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [4, 0], [4, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [5, 4] + + sparse_ids = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def _ids_and_weights_3d(self): + # Each (2-D) index demonstrates a test case: + # Index 0, 0: multiple valid ids, 1 invalid id, weighted mean + # Index 0, 1: all ids are invalid (leaving no valid ids after pruning) + # Index 0, 2: no ids to begin with + # Index 1, 0: single id + # Index 1, 1: all ids have <=0 weight + # Index 1, 2: no ids to begin with + indices = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [1, 0, 0], [1, 1, 0], + [1, 1, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [2, 3, 4] + + sparse_ids = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def test_safe_embedding_lookup_sparse_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4]) + + def test_safe_embedding_lookup_sparse_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3], + embedding_weights[0][2], embedding_weights[0][3]]) + + def test_safe_embedding_lookup_sparse_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, + [0] * 4, embedding_weights[0][2], ( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, + [(embedding_weights[0] + embedding_weights[1]) / 2.0, + [0] * 4, [0] * 4, embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose(embedding_lookup_result, [[ + (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, + [0] * 4, [0] * 4 + ], [embedding_weights[0][2], [0] * 4, [0] * 4]]) + + def test_safe_embedding_lookup_sparse_3d_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [[(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3]], [ + embedding_weights[0][2], embedding_weights[0][3], + embedding_weights[0][3] + ]]) + + def test_safe_embedding_lookup_sparse_3d_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose(embedding_lookup_result, [[( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [ + 0 + ] * 4], [ + embedding_weights[0][2], + (embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, [[ + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4, [0] * 4 + ], [ + embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights( + self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + class DynamicStitchOpTest(test.TestCase): def testCint32Cpu(self): -- GitLab From 067c85fb66345e61aee9428cd645cca786ed2bf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:52:33 +0800 Subject: [PATCH 0008/1310] CLN: delete duplicate codes --- .../layers/python/layers/embedding_ops.py | 117 ++---------------- .../python/layers/embedding_ops_test.py | 4 +- 2 files changed, 11 insertions(+), 110 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 49c3faf3b7..4353bf9c28 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -19,14 +19,12 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin -from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util from tensorflow.contrib.layers.python.ops import sparse_feature_cross_op from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor -from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops @@ -100,112 +98,15 @@ def safe_embedding_lookup_sparse(embedding_weights, logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" - if embedding_weights is None: - raise ValueError("Missing embedding_weights %s." % embedding_weights) - if isinstance(embedding_weights, variables.PartitionedVariable): - embedding_weights = list(embedding_weights) # get underlying Variables. - if not isinstance(embedding_weights, list): - embedding_weights = [embedding_weights] - if len(embedding_weights) < 1: - raise ValueError("Missing embedding_weights %s." % embedding_weights) - - dtype = sparse_weights.dtype if sparse_weights is not None else None - if isinstance(embedding_weights, variables.PartitionedVariable): - embedding_weights = list(embedding_weights) - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] - - contrib_tensor_util.assert_same_float_dtype(embedding_weights + - [sparse_weights]) - - with ops.name_scope(name, "embedding_lookup", - embedding_weights + [sparse_ids, - sparse_weights]) as scope: - # Reshape higher-rank sparse ids and weights to linear segment ids. - original_shape = sparse_ids.dense_shape - original_rank_dim = sparse_ids.dense_shape.get_shape()[0] - original_rank = ( - array_ops.size(original_shape) - if original_rank_dim.value is None - else original_rank_dim.value) - sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ - math_ops.reduce_prod( - array_ops.slice(original_shape, [0], [original_rank - 1])), - array_ops.gather(original_shape, original_rank - 1)]) - if sparse_weights is not None: - sparse_weights = sparse_tensor.SparseTensor( - sparse_ids.indices, - sparse_weights.values, sparse_ids.dense_shape) - - # Prune invalid ids and weights. - sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) - if combiner != "sum": - sparse_ids, sparse_weights = _prune_invalid_weights( - sparse_ids, sparse_weights) - - # Fill in dummy values for empty features, if necessary. - sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, - default_id or - 0) - if sparse_weights is not None: - sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) - - result = embedding_ops.embedding_lookup_sparse( - embedding_weights, - sparse_ids, - sparse_weights, - combiner=combiner, - partition_strategy=partition_strategy, - name=None if default_id is None else scope, - max_norm=max_norm) - - if default_id is None: - # Broadcast is_row_empty to the same shape as embedding_lookup_result, - # for use in Select. - is_row_empty = array_ops.tile( - array_ops.reshape(is_row_empty, [-1, 1]), - array_ops.stack([1, array_ops.shape(result)[1]])) - - result = array_ops.where(is_row_empty, - array_ops.zeros_like(result), - result, - name=scope) - - # Reshape back from linear ids back into higher-dimensional dense result. - final_result = array_ops.reshape( - result, - array_ops.concat([ - array_ops.slice( - math_ops.cast(original_shape, dtypes.int32), [0], - [original_rank - 1]), - array_ops.slice(array_ops.shape(result), [1], [-1]) - ], 0)) - final_result.set_shape(tensor_shape.unknown_shape( - (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) - return final_result - - -def _prune_invalid_ids(sparse_ids, sparse_weights): - """Prune invalid IDs (< 0) from the input ids and weights.""" - is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) - if sparse_weights is not None: - is_id_valid = math_ops.logical_and( - is_id_valid, - array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) - if sparse_weights is not None: - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) - return sparse_ids, sparse_weights - - -def _prune_invalid_weights(sparse_ids, sparse_weights): - """Prune invalid weights (< 0) from the input ids and weights.""" - if sparse_weights is not None: - is_weights_valid = math_ops.greater(sparse_weights.values, 0) - sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) - sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) - return sparse_ids, sparse_weights + return embedding_ops.safe_embedding_lookup_sparse( + embedding_weights=embedding_weights, + sparse_ids=sparse_ids, + sparse_weights=sparse_weights, + combiner=combiner, + default_id=default_id, + name=name, + partition_strategy=partition_strategy, + max_norm=max_norm) def scattered_embedding_lookup(params, diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index 87f00f94a6..4d9849b4b1 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -168,7 +168,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_2d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) @@ -245,7 +245,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_3d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) -- GitLab From 91ad552a52242b3d382eee6a3382c79be36b7df7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 24 Apr 2018 20:57:47 +0800 Subject: [PATCH 0009/1310] CLN: delete test cases in contrib --- .../python/layers/embedding_ops_test.py | 217 ------------------ 1 file changed, 217 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index 4d9849b4b1..f7b7ade39d 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -20,7 +20,6 @@ from __future__ import division from __future__ import print_function import itertools -import math import numpy as np @@ -39,222 +38,6 @@ from tensorflow.python.platform import test from tensorflow.python.util import compat -class SafeEmbeddingLookupSparseTest(test.TestCase): - - def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): - assert vocab_size > 0 - assert embed_dim > 0 - assert num_shards > 0 - assert num_shards <= vocab_size - - embedding_weights = partitioned_variables.create_partitioned_variables( - shape=[vocab_size, embed_dim], - slicing=[num_shards, 1], - initializer=init_ops.truncated_normal_initializer( - mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32)) - for w in embedding_weights: - w.initializer.run() - embedding_weights = [w.eval() for w in embedding_weights] - return embedding_weights - - def _ids_and_weights_2d(self): - # Each row demonstrates a test case: - # Row 0: multiple valid ids, 1 invalid id, weighted mean - # Row 1: all ids are invalid (leaving no valid ids after pruning) - # Row 2: no ids to begin with - # Row 3: single id - # Row 4: all ids have <=0 weight - indices = [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [4, 0], [4, 1]] - ids = [0, 1, -1, -1, 2, 0, 1] - weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] - shape = [5, 4] - - sparse_ids = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(ids, dtypes.int64), - constant_op.constant(shape, dtypes.int64)) - - sparse_weights = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(weights, dtypes.float32), - constant_op.constant(shape, dtypes.int64)) - - return sparse_ids, sparse_weights - - def _ids_and_weights_3d(self): - # Each (2-D) index demonstrates a test case: - # Index 0, 0: multiple valid ids, 1 invalid id, weighted mean - # Index 0, 1: all ids are invalid (leaving no valid ids after pruning) - # Index 0, 2: no ids to begin with - # Index 1, 0: single id - # Index 1, 1: all ids have <=0 weight - # Index 1, 2: no ids to begin with - indices = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [1, 0, 0], [1, 1, 0], - [1, 1, 1]] - ids = [0, 1, -1, -1, 2, 0, 1] - weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] - shape = [2, 3, 4] - - sparse_ids = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(ids, dtypes.int64), - constant_op.constant(shape, dtypes.int64)) - - sparse_weights = sparse_tensor_lib.SparseTensor( - constant_op.constant(indices, dtypes.int64), - constant_op.constant(weights, dtypes.float32), - constant_op.constant(shape, dtypes.int64)) - - return sparse_ids, sparse_weights - - def test_safe_embedding_lookup_sparse_return_zero_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights).eval()) - - self.assertAllClose( - embedding_lookup_result, - [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / - 3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4]) - - def test_safe_embedding_lookup_sparse_return_special_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) - - self.assertAllClose( - embedding_lookup_result, - [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / - 3.0, embedding_weights[0][3], embedding_weights[0][3], - embedding_weights[0][2], embedding_weights[0][3]]) - - def test_safe_embedding_lookup_sparse_no_weights(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, _ = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - self.assertAllClose( - embedding_lookup_result, - [(embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, - [0] * 4, embedding_weights[0][2], ( - embedding_weights[0][0] + embedding_weights[0][1]) / 2.0]) - - def test_safe_embedding_lookup_sparse_partitioned(self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, _ = self._ids_and_weights_2d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - embedding_weights = list(itertools.chain(*embedding_weights)) - self.assertAllClose(embedding_lookup_result, - [(embedding_weights[0] + embedding_weights[1]) / 2.0, - [0] * 4, [0] * 4, embedding_weights[2], - (embedding_weights[0] + embedding_weights[1]) / 2.0]) - - def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, sparse_weights = self._ids_and_weights_2d() - - embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids) - embedding_weights = [ - constant_op.constant(w, dtype=dtypes.float64) - for w in embedding_weights - ] - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids, sparse_weights) - - def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights).eval()) - - self.assertAllClose(embedding_lookup_result, [[ - (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, - [0] * 4, [0] * 4 - ], [embedding_weights[0][2], [0] * 4, [0] * 4]]) - - def test_safe_embedding_lookup_sparse_3d_return_special_vector(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, sparse_weights = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) - - self.assertAllClose( - embedding_lookup_result, - [[(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / - 3.0, embedding_weights[0][3], embedding_weights[0][3]], [ - embedding_weights[0][2], embedding_weights[0][3], - embedding_weights[0][3] - ]]) - - def test_safe_embedding_lookup_sparse_3d_no_weights(self): - with self.test_session(): - embedding_weights = self._random_weights() - sparse_ids, _ = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - self.assertAllClose(embedding_lookup_result, [[( - embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [ - 0 - ] * 4], [ - embedding_weights[0][2], - (embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4 - ]]) - - def test_safe_embedding_lookup_sparse_3d_partitioned(self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, _ = self._ids_and_weights_3d() - - embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( - embedding_weights, sparse_ids, None).eval()) - - embedding_weights = list(itertools.chain(*embedding_weights)) - self.assertAllClose(embedding_lookup_result, [[ - (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4, [0] * 4 - ], [ - embedding_weights[2], - (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4 - ]]) - - def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights( - self): - with self.test_session(): - embedding_weights = self._random_weights(num_shards=3) - sparse_ids, sparse_weights = self._ids_and_weights_3d() - - embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids) - embedding_weights = [ - constant_op.constant(w, dtype=dtypes.float64) - for w in embedding_weights - ] - self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, - embedding_weights, sparse_ids, sparse_weights) - - class ScatteredEmbeddingLookupTest(test.TestCase): def setUp(self): -- GitLab From 24a6350ad173865c16351825f251f2fde97b7d9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 26 Apr 2018 07:01:06 +0800 Subject: [PATCH 0010/1310] Revert "CLN: delete test cases in contrib" This reverts commit 91ad552a52242b3d382eee6a3382c79be36b7df7. --- .../python/layers/embedding_ops_test.py | 217 ++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index f7b7ade39d..4d9849b4b1 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import itertools +import math import numpy as np @@ -38,6 +39,222 @@ from tensorflow.python.platform import test from tensorflow.python.util import compat +class SafeEmbeddingLookupSparseTest(test.TestCase): + + def _random_weights(self, vocab_size=4, embed_dim=4, num_shards=1): + assert vocab_size > 0 + assert embed_dim > 0 + assert num_shards > 0 + assert num_shards <= vocab_size + + embedding_weights = partitioned_variables.create_partitioned_variables( + shape=[vocab_size, embed_dim], + slicing=[num_shards, 1], + initializer=init_ops.truncated_normal_initializer( + mean=0.0, stddev=1.0 / math.sqrt(vocab_size), dtype=dtypes.float32)) + for w in embedding_weights: + w.initializer.run() + embedding_weights = [w.eval() for w in embedding_weights] + return embedding_weights + + def _ids_and_weights_2d(self): + # Each row demonstrates a test case: + # Row 0: multiple valid ids, 1 invalid id, weighted mean + # Row 1: all ids are invalid (leaving no valid ids after pruning) + # Row 2: no ids to begin with + # Row 3: single id + # Row 4: all ids have <=0 weight + indices = [[0, 0], [0, 1], [0, 2], [1, 0], [3, 0], [4, 0], [4, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [5, 4] + + sparse_ids = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def _ids_and_weights_3d(self): + # Each (2-D) index demonstrates a test case: + # Index 0, 0: multiple valid ids, 1 invalid id, weighted mean + # Index 0, 1: all ids are invalid (leaving no valid ids after pruning) + # Index 0, 2: no ids to begin with + # Index 1, 0: single id + # Index 1, 1: all ids have <=0 weight + # Index 1, 2: no ids to begin with + indices = [[0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 1, 0], [1, 0, 0], [1, 1, 0], + [1, 1, 1]] + ids = [0, 1, -1, -1, 2, 0, 1] + weights = [1.0, 2.0, 1.0, 1.0, 3.0, 0.0, -0.5] + shape = [2, 3, 4] + + sparse_ids = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(ids, dtypes.int64), + constant_op.constant(shape, dtypes.int64)) + + sparse_weights = sparse_tensor_lib.SparseTensor( + constant_op.constant(indices, dtypes.int64), + constant_op.constant(weights, dtypes.float32), + constant_op.constant(shape, dtypes.int64)) + + return sparse_ids, sparse_weights + + def test_safe_embedding_lookup_sparse_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, [0] * 4, [0] * 4, embedding_weights[0][2], [0] * 4]) + + def test_safe_embedding_lookup_sparse_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3], + embedding_weights[0][2], embedding_weights[0][3]]) + + def test_safe_embedding_lookup_sparse_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose( + embedding_lookup_result, + [(embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, + [0] * 4, embedding_weights[0][2], ( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_2d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, + [(embedding_weights[0] + embedding_weights[1]) / 2.0, + [0] * 4, [0] * 4, embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0]) + + def test_safe_embedding_lookup_sparse_partitioned_inconsistent_weights(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_2d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + def test_safe_embedding_lookup_sparse_3d_return_zero_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights).eval()) + + self.assertAllClose(embedding_lookup_result, [[ + (1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / 3.0, + [0] * 4, [0] * 4 + ], [embedding_weights[0][2], [0] * 4, [0] * 4]]) + + def test_safe_embedding_lookup_sparse_3d_return_special_vector(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, sparse_weights, default_id=3).eval()) + + self.assertAllClose( + embedding_lookup_result, + [[(1.0 * embedding_weights[0][0] + 2.0 * embedding_weights[0][1]) / + 3.0, embedding_weights[0][3], embedding_weights[0][3]], [ + embedding_weights[0][2], embedding_weights[0][3], + embedding_weights[0][3] + ]]) + + def test_safe_embedding_lookup_sparse_3d_no_weights(self): + with self.test_session(): + embedding_weights = self._random_weights() + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + self.assertAllClose(embedding_lookup_result, [[( + embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4, [ + 0 + ] * 4], [ + embedding_weights[0][2], + (embedding_weights[0][0] + embedding_weights[0][1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned(self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, _ = self._ids_and_weights_3d() + + embedding_lookup_result = (embedding_ops.safe_embedding_lookup_sparse( + embedding_weights, sparse_ids, None).eval()) + + embedding_weights = list(itertools.chain(*embedding_weights)) + self.assertAllClose(embedding_lookup_result, [[ + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4, [0] * 4 + ], [ + embedding_weights[2], + (embedding_weights[0] + embedding_weights[1]) / 2.0, [0] * 4 + ]]) + + def test_safe_embedding_lookup_sparse_3d_partitioned_inconsistent_weights( + self): + with self.test_session(): + embedding_weights = self._random_weights(num_shards=3) + sparse_ids, sparse_weights = self._ids_and_weights_3d() + + embedding_weights[1] = embedding_weights[1].astype(np.float64) + self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids) + embedding_weights = [ + constant_op.constant(w, dtype=dtypes.float64) + for w in embedding_weights + ] + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, + embedding_weights, sparse_ids, sparse_weights) + + class ScatteredEmbeddingLookupTest(test.TestCase): def setUp(self): -- GitLab From 1c1b4d47707a439c157b5dcf3755e391730a328c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 26 Apr 2018 07:01:21 +0800 Subject: [PATCH 0011/1310] Revert "CLN: delete duplicate codes" This reverts commit 067c85fb66345e61aee9428cd645cca786ed2bf4. --- .../layers/python/layers/embedding_ops.py | 117 ++++++++++++++++-- .../python/layers/embedding_ops_test.py | 4 +- 2 files changed, 110 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 4353bf9c28..49c3faf3b7 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -19,12 +19,14 @@ from __future__ import print_function from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util from tensorflow.contrib.layers.python.ops import sparse_feature_cross_op from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops @@ -98,15 +100,112 @@ def safe_embedding_lookup_sparse(embedding_weights, logging.warn("The default value of combiner will change from \"mean\" " "to \"sqrtn\" after 2016/11/01.") combiner = "mean" - return embedding_ops.safe_embedding_lookup_sparse( - embedding_weights=embedding_weights, - sparse_ids=sparse_ids, - sparse_weights=sparse_weights, - combiner=combiner, - default_id=default_id, - name=name, - partition_strategy=partition_strategy, - max_norm=max_norm) + if embedding_weights is None: + raise ValueError("Missing embedding_weights %s." % embedding_weights) + if isinstance(embedding_weights, variables.PartitionedVariable): + embedding_weights = list(embedding_weights) # get underlying Variables. + if not isinstance(embedding_weights, list): + embedding_weights = [embedding_weights] + if len(embedding_weights) < 1: + raise ValueError("Missing embedding_weights %s." % embedding_weights) + + dtype = sparse_weights.dtype if sparse_weights is not None else None + if isinstance(embedding_weights, variables.PartitionedVariable): + embedding_weights = list(embedding_weights) + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] + + contrib_tensor_util.assert_same_float_dtype(embedding_weights + + [sparse_weights]) + + with ops.name_scope(name, "embedding_lookup", + embedding_weights + [sparse_ids, + sparse_weights]) as scope: + # Reshape higher-rank sparse ids and weights to linear segment ids. + original_shape = sparse_ids.dense_shape + original_rank_dim = sparse_ids.dense_shape.get_shape()[0] + original_rank = ( + array_ops.size(original_shape) + if original_rank_dim.value is None + else original_rank_dim.value) + sparse_ids = sparse_ops.sparse_reshape(sparse_ids, [ + math_ops.reduce_prod( + array_ops.slice(original_shape, [0], [original_rank - 1])), + array_ops.gather(original_shape, original_rank - 1)]) + if sparse_weights is not None: + sparse_weights = sparse_tensor.SparseTensor( + sparse_ids.indices, + sparse_weights.values, sparse_ids.dense_shape) + + # Prune invalid ids and weights. + sparse_ids, sparse_weights = _prune_invalid_ids(sparse_ids, sparse_weights) + if combiner != "sum": + sparse_ids, sparse_weights = _prune_invalid_weights( + sparse_ids, sparse_weights) + + # Fill in dummy values for empty features, if necessary. + sparse_ids, is_row_empty = sparse_ops.sparse_fill_empty_rows(sparse_ids, + default_id or + 0) + if sparse_weights is not None: + sparse_weights, _ = sparse_ops.sparse_fill_empty_rows(sparse_weights, 1.0) + + result = embedding_ops.embedding_lookup_sparse( + embedding_weights, + sparse_ids, + sparse_weights, + combiner=combiner, + partition_strategy=partition_strategy, + name=None if default_id is None else scope, + max_norm=max_norm) + + if default_id is None: + # Broadcast is_row_empty to the same shape as embedding_lookup_result, + # for use in Select. + is_row_empty = array_ops.tile( + array_ops.reshape(is_row_empty, [-1, 1]), + array_ops.stack([1, array_ops.shape(result)[1]])) + + result = array_ops.where(is_row_empty, + array_ops.zeros_like(result), + result, + name=scope) + + # Reshape back from linear ids back into higher-dimensional dense result. + final_result = array_ops.reshape( + result, + array_ops.concat([ + array_ops.slice( + math_ops.cast(original_shape, dtypes.int32), [0], + [original_rank - 1]), + array_ops.slice(array_ops.shape(result), [1], [-1]) + ], 0)) + final_result.set_shape(tensor_shape.unknown_shape( + (original_rank_dim - 1).value).concatenate(result.get_shape()[1:])) + return final_result + + +def _prune_invalid_ids(sparse_ids, sparse_weights): + """Prune invalid IDs (< 0) from the input ids and weights.""" + is_id_valid = math_ops.greater_equal(sparse_ids.values, 0) + if sparse_weights is not None: + is_id_valid = math_ops.logical_and( + is_id_valid, + array_ops.ones_like(sparse_weights.values, dtype=dtypes.bool)) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_id_valid) + if sparse_weights is not None: + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_id_valid) + return sparse_ids, sparse_weights + + +def _prune_invalid_weights(sparse_ids, sparse_weights): + """Prune invalid weights (< 0) from the input ids and weights.""" + if sparse_weights is not None: + is_weights_valid = math_ops.greater(sparse_weights.values, 0) + sparse_ids = sparse_ops.sparse_retain(sparse_ids, is_weights_valid) + sparse_weights = sparse_ops.sparse_retain(sparse_weights, is_weights_valid) + return sparse_ids, sparse_weights def scattered_embedding_lookup(params, diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py index 4d9849b4b1..87f00f94a6 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py @@ -168,7 +168,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_2d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) @@ -245,7 +245,7 @@ class SafeEmbeddingLookupSparseTest(test.TestCase): sparse_ids, sparse_weights = self._ids_and_weights_3d() embedding_weights[1] = embedding_weights[1].astype(np.float64) - self.assertRaises(TypeError, embedding_ops.safe_embedding_lookup_sparse, + self.assertRaises(ValueError, embedding_ops.safe_embedding_lookup_sparse, embedding_weights, sparse_ids) embedding_weights = [ constant_op.constant(w, dtype=dtypes.float64) -- GitLab From f2a0bc58db70cc792649672b81317288c4151ebb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 26 Apr 2018 20:52:03 +0800 Subject: [PATCH 0012/1310] BLD: update golden --- tensorflow/tools/api/golden/tensorflow.nn.pbtxt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt index 455590d866..d9e5b0d0fc 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.pbtxt @@ -260,6 +260,10 @@ tf_module { name: "relu_layer" argspec: "args=[\'x\', \'weights\', \'biases\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "safe_embedding_lookup_sparse" + argspec: "args=[\'embedding_weights\', \'sparse_ids\', \'sparse_weights\', \'combiner\', \'default_id\', \'name\', \'partition_strategy\', \'max_norm\'], varargs=None, keywords=None, defaults=[\'None\', \'mean\', \'None\', \'None\', \'div\', \'None\'], " + } member_method { name: "sampled_softmax_loss" argspec: "args=[\'weights\', \'biases\', \'labels\', \'inputs\', \'num_sampled\', \'num_classes\', \'num_true\', \'sampled_values\', \'remove_accidental_hits\', \'partition_strategy\', \'name\', \'seed\'], varargs=None, keywords=None, defaults=[\'1\', \'None\', \'True\', \'mod\', \'sampled_softmax_loss\', \'None\'], " -- GitLab From cd2ba0c063ffd89f0310a6ab6482a5607e590cb1 Mon Sep 17 00:00:00 2001 From: Dan Osipov Date: Sun, 18 Mar 2018 18:50:34 -0700 Subject: [PATCH 0013/1310] Document additional argument --- tensorflow/python/ops/image_ops_impl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 8524c08f81..cee948fe43 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -984,6 +984,7 @@ def resize_image_aspect_with_pad(image, target_height, target_width, 3-D Tensor of shape `[height, width, channels]`. target_height: Target height. target_width: Target width. + method: Method to use for resizing image. See `resize_images()` Raises: ValueError: if `target_height` or `target_width` are zero or negative. -- GitLab From 96dc82647d0eb5d1903242c2dde1cf9dd5bb36f0 Mon Sep 17 00:00:00 2001 From: Dan Osipov Date: Sat, 28 Apr 2018 08:28:33 -0700 Subject: [PATCH 0014/1310] Rename API method --- tensorflow/python/ops/image_ops.py | 2 +- tensorflow/python/ops/image_ops_impl.py | 6 +++--- tensorflow/python/ops/image_ops_test.py | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py index f11b6dcea6..091ec61b1f 100644 --- a/tensorflow/python/ops/image_ops.py +++ b/tensorflow/python/ops/image_ops.py @@ -36,7 +36,7 @@ See the @{$python/image} guide. @@resize_bilinear @@resize_nearest_neighbor @@resize_image_with_crop_or_pad -@@resize_image_aspect_with_pad +@@resize_image_with_pad @@central_crop @@pad_to_bounding_box @@crop_to_bounding_box diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index cee948fe43..5fe0b7a251 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -969,8 +969,8 @@ def resize_images(images, return images -@tf_export('image.resize_image_aspect_with_pad') -def resize_image_aspect_with_pad(image, target_height, target_width, +@tf_export('image.resize_image_with_pad') +def resize_image_with_pad(image, target_height, target_width, method=ResizeMethod.BILINEAR): """ Resizes and pads an image to a target width and height. @@ -996,7 +996,7 @@ def resize_image_aspect_with_pad(image, target_height, target_width, If `images` was 3-D, a 3-D float Tensor of shape `[new_height, new_width, channels]`. """ - with ops.name_scope(None, 'resize_image_aspect_with_pad', [image]): + with ops.name_scope(None, 'resize_image_with_pad', [image]): image = ops.convert_to_tensor(image, name='image') image_shape = image.get_shape() is_batch = True diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 40a4d175ac..22d9ce4289 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -2458,9 +2458,9 @@ class ResizeImagesTest(test_util.TensorFlowTestCase): self.assertTrue(y.op.name.startswith("resize_images")) -class ResizeImageAspectWithPadTest(test_util.TensorFlowTestCase): +class ResizeImageWithPadTest(test_util.TensorFlowTestCase): - def _ResizeImageAspectWithPad(self, x, target_height, target_width, + def _ResizeImageWithPad(self, x, target_height, target_width, use_tensor_inputs): if use_tensor_inputs: target_height = ops.convert_to_tensor(target_height) @@ -2471,7 +2471,7 @@ class ResizeImageAspectWithPadTest(test_util.TensorFlowTestCase): x_tensor = x feed_dict = {} - y = image_ops.resize_image_aspect_with_pad(x_tensor, target_height, + y = image_ops.resize_image_with_pad(x_tensor, target_height, target_width) if not use_tensor_inputs: self.assertTrue(y.get_shape().is_fully_defined()) @@ -2491,7 +2491,7 @@ class ResizeImageAspectWithPadTest(test_util.TensorFlowTestCase): y = np.array(y).reshape(y_shape) for use_tensor_inputs in use_tensor_inputs_options: - y_tf = self._ResizeImageAspectWithPad(x, target_height, target_width, + y_tf = self._ResizeImageWithPad(x, target_height, target_width, use_tensor_inputs) self.assertAllClose(y, y_tf) @@ -2507,7 +2507,7 @@ class ResizeImageAspectWithPadTest(test_util.TensorFlowTestCase): for use_tensor_inputs in use_tensor_inputs_options: try: - self._ResizeImageAspectWithPad(x, target_height, target_width, + self._ResizeImageWithPad(x, target_height, target_width, use_tensor_inputs) except Exception as e: if err_msg not in str(e): @@ -2517,7 +2517,7 @@ class ResizeImageAspectWithPadTest(test_util.TensorFlowTestCase): def _assertShapeInference(self, pre_shape, height, width, post_shape): image = array_ops.placeholder(dtypes.float32, shape=pre_shape) - y = image_ops.resize_image_aspect_with_pad(image, height, width) + y = image_ops.resize_image_with_pad(image, height, width) self.assertEqual(y.get_shape().as_list(), post_shape) def testNoOp(self): -- GitLab From 533cb5caa4c88d3f76e1994e8f039ea04d342482 Mon Sep 17 00:00:00 2001 From: Dan Osipov Date: Sat, 28 Apr 2018 08:30:56 -0700 Subject: [PATCH 0015/1310] Remove assertions --- tensorflow/python/ops/image_ops_impl.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 5fe0b7a251..e174feedb5 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1066,16 +1066,6 @@ def resize_image_with_pad(image, target_height, target_width, _, resized_height, resized_width, _ = _ImageDimensions(resized, rank=4) - assert_ops = [] - assert_ops += _assert( - equal_(resized_height, target_height), ValueError, - 'resized height is not correct.') - assert_ops += _assert( - equal_(resized_width, target_width), ValueError, - 'resized width is not correct.') - - resized = control_flow_ops.with_dependencies(assert_ops, resized) - if not is_batch: resized = array_ops.squeeze(resized, squeeze_dims=[0]) -- GitLab From 764ea231d9b649ad167fd1ffd4f4c5c4e79642c7 Mon Sep 17 00:00:00 2001 From: Dan Osipov Date: Sat, 28 Apr 2018 08:32:36 -0700 Subject: [PATCH 0016/1310] Update docstring --- tensorflow/python/ops/image_ops_impl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index e174feedb5..d5ac72bac6 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -976,8 +976,9 @@ def resize_image_with_pad(image, target_height, target_width, Resizes and pads an image to a target width and height. Resizes an image to a target width and height by keeping - the aspect ratio the same without distortion and padding - it evenly with zeros. + the aspect ratio the same without distortion. If the target + dimensions don't match the image dimensions, the image + is padded with zeroes prior to resizing. Args: image: 4-D Tensor of shape `[batch, height, width, channels]` or -- GitLab From 74171d402a52074806bc5f0d1a3ddae92212214f Mon Sep 17 00:00:00 2001 From: Dan Osipov Date: Tue, 8 May 2018 14:24:32 -0700 Subject: [PATCH 0017/1310] Fix bad merge --- tensorflow/python/ops/image_ops.py | 61 ------------------------------ 1 file changed, 61 deletions(-) diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py index 091ec61b1f..343531ac55 100644 --- a/tensorflow/python/ops/image_ops.py +++ b/tensorflow/python/ops/image_ops.py @@ -17,67 +17,6 @@ """Image processing and decoding ops. See the @{$python/image} guide. -<<<<<<< HEAD -======= - -@@decode_bmp -@@decode_gif -@@decode_jpeg -@@decode_and_crop_jpeg -@@encode_jpeg -@@extract_jpeg_shape -@@decode_png -@@encode_png -@@is_jpeg -@@decode_image -@@resize_images -@@resize_area -@@resize_bicubic -@@resize_bilinear -@@resize_nearest_neighbor -@@resize_image_with_crop_or_pad -@@resize_image_with_pad -@@central_crop -@@pad_to_bounding_box -@@crop_to_bounding_box -@@extract_glimpse -@@crop_and_resize -@@flip_up_down -@@random_flip_up_down -@@flip_left_right -@@random_flip_left_right -@@transpose_image -@@rot90 - -@@rgb_to_grayscale -@@grayscale_to_rgb -@@hsv_to_rgb -@@rgb_to_hsv -@@rgb_to_yiq -@@yiq_to_rgb -@@rgb_to_yuv -@@yuv_to_rgb -@@convert_image_dtype -@@adjust_brightness -@@random_brightness -@@adjust_contrast -@@random_contrast -@@adjust_hue -@@random_hue -@@adjust_gamma -@@adjust_saturation -@@random_saturation -@@per_image_standardization -@@draw_bounding_boxes -@@non_max_suppression -@@sample_distorted_bounding_box -@@total_variation -@@psnr -@@ssim -@@ssim_multiscale -@@image_gradients -@@sobel_edges ->>>>>>> 88687fa... Add resize_image_aspect_with_pad method """ from __future__ import absolute_import from __future__ import division -- GitLab From 5e6b20e53720e8d00619d851ce983f8da77c5cf4 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 14:54:53 -0700 Subject: [PATCH 0018/1310] Deploy TensorFlow ecosystem jars --- tensorflow/java/maven/pom.xml | 10 +- tensorflow/java/maven/release.sh | 1 + tensorflow/java/maven/run_inside_container.sh | 42 ++++- .../pom-spark.xml.template | 19 +++ .../spark-tensorflow-connector/update.py | 152 ++++++++++++++++++ .../tensorflow-hadoop/pom-hadoop.xml.template | 18 +++ .../java/maven/tensorflow-hadoop/update.py | 114 +++++++++++++ 7 files changed, 352 insertions(+), 4 deletions(-) create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/update.py create mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template create mode 100644 tensorflow/java/maven/tensorflow-hadoop/update.py diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 0a09a5ea7c..21fed5a419 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0 + 1.8.0-SNAPSHOT pom https://www.tensorflow.org @@ -32,6 +32,8 @@ libtensorflow_jni_gpu tensorflow proto + tensorflow-hadoop + spark-tensorflow-connector ossrh - https://oss.sonatype.org/content/repositories/snapshots + https://tap.jfrog.io/tap/public-snapshots + ossrh @@ -74,6 +77,7 @@ + diff --git a/tensorflow/java/maven/release.sh b/tensorflow/java/maven/release.sh index 9012ea14ea..6c51029198 100755 --- a/tensorflow/java/maven/release.sh +++ b/tensorflow/java/maven/release.sh @@ -48,6 +48,7 @@ fi set -ex docker run \ + $DOCKER_PROXY_RUN_ARGS \ -e TF_VERSION="${TF_VERSION}" \ -e DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" \ -e DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" \ diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 6136ccfdfb..73f7ee94a0 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -32,11 +32,15 @@ if [[ "${TF_VERSION}" == *"-SNAPSHOT" ]]; then DEPLOY_BINTRAY="false" fi PROTOC_RELEASE_URL="https://github.com/google/protobuf/releases/download/v3.3.0/protoc-3.3.0-linux-x86_64.zip" +TF_ECOSYSTEM_URL="https://github.com/tensorflow/ecosystem.git" + if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then echo "Must deploy to at least one of Bintray or OSSRH" >&2 exit 2 fi +IS_SNAPSHOT="true" + set -ex clean() { @@ -183,6 +187,41 @@ generate_java_protos() { rm -rf "${DIR}/proto/tmp" } + +download_tf_ecosystem() { + ECOSYSTEM_DIR="/tmp/tensorflow-ecosystem" + HADOOP_DIR="${DIR}/tensorflow-hadoop" + SPARK_DIR="${DIR}/spark-tensorflow-connector" + + # Clean any previous attempts + rm -rf "${ECOSYSTEM_DIR}" + + # Clone the TensorFlow ecosystem project + mkdir -p "${ECOSYSTEM_DIR}" + cd "${ECOSYSTEM_DIR}" + git clone "${TF_ECOSYSTEM_URL}" + + # Copy the TensorFlow Hadoop source + cp -r "${ECOSYSTEM_DIR}/ecosystem/hadoop/src" "${HADOOP_DIR}" + python ${HADOOP_DIR}/update.py --template ${HADOOP_DIR}/pom-hadoop.xml.template \ + --input_pom ${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml \ + --output_pom ${HADOOP_DIR}/pom.xml \ + --version ${TF_VERSION} + + # Copy the TensorFlow Spark connector source + cp -r "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/src" "${SPARK_DIR}" + python ${SPARK_DIR}/update.py --template ${SPARK_DIR}/pom-spark.xml.template \ + --input_pom ${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml \ + --output_pom ${SPARK_DIR}/pom.xml \ + --version ${TF_VERSION} \ + --scala_version 2.11 + + # Cleanup + rm -rf "${ECOSYSTEM_DIR}" + + cd "${DIR}" +} + # Deploy artifacts using a specific profile. # Arguments: # profile - name of selected profile. @@ -240,7 +279,7 @@ cd "${DIR}" # Comment lines out appropriately if debugging/tinkering with the release # process. # gnupg2 is required for signing -apt-get -qq update && apt-get -qqq install -y gnupg2 +apt-get -qq update && apt-get -qqq install -y gnupg2 && apt-get -qqq install -y git clean update_version_in_pom download_libtensorflow @@ -248,6 +287,7 @@ download_libtensorflow_jni download_libtensorflow_jni_gpu update_tensorflow_android generate_java_protos +download_tf_ecosystem # Build the release artifacts mvn verify # Push artifacts to repository diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template new file mode 100644 index 0000000000..d8a3d559be --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template @@ -0,0 +1,19 @@ + + 4.0.0 + TensorFlow TFRecord connector for Apache Spark DataFrames + spark-tensorflow-connector_${scala_version} + ${version} + jar + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + ${version} + ../ + + + diff --git a/tensorflow/java/maven/spark-tensorflow-connector/update.py b/tensorflow/java/maven/spark-tensorflow-connector/update.py new file mode 100644 index 0000000000..6185ccbb00 --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/update.py @@ -0,0 +1,152 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Merge TensorFlow Spark connector pom from with deployment template. + +The TensorFlow Spark connector pom is here: https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import string +import xml.etree.ElementTree as ET + +POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" +SCALA_VERSION_TAG = "scala.binary.version" + + +def get_args(): + """Parse command line args.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--version', + required=True, + help='Version for the artifact.') + parser.add_argument( + '--scala_version', + required=True, + choices=['2.10', '2.11'], + help='Scala version for the artifact.') + parser.add_argument( + '--template', + required=True, + help='Path to the pom file template.') + parser.add_argument( + '--input_pom', + required=True, + help='Path to input pom file to merge with template.') + parser.add_argument( + '--output_pom', + required=True, + help='Path to output pom file.') + return parser.parse_args() + + +def load_pom(input_path): + """ Loads POM file to XML tree""" + ET.register_namespace("", POM_NAMESPACE) + tree = ET.parse(input_path) + return tree + + +def update_scala_version(tree, version, is_template=False): + """ Updates scala version in XML tree""" + + if is_template: + tag = "{%s}artifactId" % POM_NAMESPACE + nodes = tree.findall(tag) + + if nodes is None: + raise ValueError("Missing artifactId in template pom") + + for node in nodes: + template = string.Template(node.text) + + text = template.substitute({"scala_version": version}) + node.text = text + else: + # Update scala version property in pom + tag = "{%s}%s" % (POM_NAMESPACE, SCALA_VERSION_TAG) + nodes = nodes = list(tree.iter(tag)) + + if len(nodes) == 0: + raise ValueError("Missing %s property in Spark connector pom") + + for node in nodes: + node.text = version + + return tree + + +def update_version(tree, version): + """ Updates version tags in XML tree """ + version_tag = "{%s}version" % POM_NAMESPACE + nodes = list(tree.iter(version_tag)) + + if len(nodes) == 0: + raise ValueError("Missing version in template pom") + + for node in nodes: + node.text = version + + return tree + + +def merge_tags(template_root, pom_root): + """ Merge pom file from TensorFlow Spark connector with deployment template. + + Modify the TensorFlow Spark connector pom to inherit parent pom and version info and + other tags provided by deployment template. + + TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed + for propagating the deployment profile. + + Args: + template_root: Root XML element for template file. + pom_root: Root XML element for TensorFlow Spark connector pom file. + + Return: + template_root: Root XML element with merged tree. + """ + template_tags = [child.tag for child in template_root] + template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent + + for child in pom_root: + if child.tag not in template_tags: + template_root.append(child) + + return template_root + + +def main(): + args = get_args() + template_tree = load_pom(args.template) + pom_tree = load_pom(args.input_pom) + + template_tree = update_version(template_tree, args.version) + template_tree = update_scala_version(template_tree, args.scala_version, is_template=True) + pom_tree = update_scala_version(pom_tree, args.scala_version, is_template=False) + template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) + + with open(args.output_pom, "w") as f: + f.write(ET.tostring(template_root)) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template new file mode 100644 index 0000000000..6a82c56cc7 --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template @@ -0,0 +1,18 @@ + + 4.0.0 + TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop + tensorflow-hadoop + ${version} + jar + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + ${version} + ../ + + diff --git a/tensorflow/java/maven/tensorflow-hadoop/update.py b/tensorflow/java/maven/tensorflow-hadoop/update.py new file mode 100644 index 0000000000..503062608d --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/update.py @@ -0,0 +1,114 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Merge TensorFlow Hadoop pom from with deployment template. + +The TensorFlow Hadoop pom is here: https://github.com/tensorflow/ecosystem/tree/master/hadoop +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys +import xml.etree.ElementTree as ET + +POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" + + +def get_args(): + """Parse command line args.""" + parser = argparse.ArgumentParser() + parser.add_argument( + '--version', + required=True, + help='Version for the artifact.') + parser.add_argument( + '--template', + required=True, + help='Path to the pom file template.') + parser.add_argument( + '--input_pom', + required=True, + help='Path to input pom file to merge with template.') + parser.add_argument( + '--output_pom', + required=True, + help='Path to output pom file.') + return parser.parse_args() + + +def load_pom(input_path): + """ Loads POM file to XML tree""" + ET.register_namespace("", POM_NAMESPACE) + tree = ET.parse(input_path) + return tree + + +def update_version(tree, version): + """ Updates version tags in XML tree """ + version_tag = "{%s}version" % POM_NAMESPACE + nodes = list(tree.iter(version_tag)) + + if len(nodes) == 0: + raise ValueError("Missing version in template pom") + + for node in nodes: + node.text = version + + return tree + + +def merge_tags(template_root, pom_root): + """ Merge pom file from TensorFlow Hadoop with deployment template. + + Modify the TensorFlow Hadoop pom to inherit parent pom and version info and + other tags provided by deployment template. + + TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed + for propagating the deployment profile. + + Args: + template_root: Root XML element for template file. + pom_root: Root XML element for TensorFlow Hadoop pom file. + + Return: + template_root: Root XML element with merged tree. + """ + template_tags = [child.tag for child in template_root] + template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent + + for child in pom_root: + if child.tag not in template_tags: + template_root.append(child) + + return template_root + + +def main(): + args = get_args() + template_tree = load_pom(args.template) + pom_tree = load_pom(args.input_pom) + + template_tree = update_version(template_tree, args.version) + template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) + + with open(args.output_pom, "w") as f: + f.write(ET.tostring(template_root)) + + +if __name__ == '__main__': + sys.exit(main()) -- GitLab From f957cfbc4d27a57bf08d128b41042a16f1155ab0 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 18:40:20 -0700 Subject: [PATCH 0019/1310] Add TensorFlow ecosystem Spark and Hadoop jars to Maven deployment --- tensorflow/java/maven/README.md | 6 +++++ tensorflow/java/maven/pom.xml | 8 +++--- tensorflow/java/maven/release.sh | 1 - tensorflow/java/maven/run_inside_container.sh | 26 ++++++++++--------- .../maven/spark-tensorflow-connector/pom.xml | 24 +++++++++++++++++ .../java/maven/tensorflow-hadoop/pom.xml | 24 +++++++++++++++++ 6 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom.xml create mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom.xml diff --git a/tensorflow/java/maven/README.md b/tensorflow/java/maven/README.md index c7e8f03806..fa756815a9 100644 --- a/tensorflow/java/maven/README.md +++ b/tensorflow/java/maven/README.md @@ -53,6 +53,12 @@ There are seven artifacts and thus `pom.xml`s involved in this release: 7. [`parentpom`](https://maven.apache.org/pom/index.html): Common settings shared by all of the above. +8. `tensorflow-hadoop`: The TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop. + The source code for this package is available in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/hadoop) + +9. `spark-tensorflow-connector`: A Scala library for loading and storing TensorFlow TFRecord + using Apache Spark DataFrames. The source code for this package is available + in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector) ## Updating the release diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 21fed5a419..7a95fb2556 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.8.0-SNAPSHOT + 1.8.0 pom https://www.tensorflow.org @@ -46,8 +46,7 @@ ossrh - https://tap.jfrog.io/tap/public-snapshots - + https://oss.sonatype.org/content/repositories/snapshots ossrh @@ -77,7 +76,6 @@ - + diff --git a/tensorflow/java/maven/release.sh b/tensorflow/java/maven/release.sh index 6c51029198..9012ea14ea 100755 --- a/tensorflow/java/maven/release.sh +++ b/tensorflow/java/maven/release.sh @@ -48,7 +48,6 @@ fi set -ex docker run \ - $DOCKER_PROXY_RUN_ARGS \ -e TF_VERSION="${TF_VERSION}" \ -e DEPLOY_OSSRH="${DEPLOY_OSSRH:-true}" \ -e DEPLOY_BINTRAY="${DEPLOY_BINTRAY:-true}" \ diff --git a/tensorflow/java/maven/run_inside_container.sh b/tensorflow/java/maven/run_inside_container.sh index 73f7ee94a0..3808104bc1 100644 --- a/tensorflow/java/maven/run_inside_container.sh +++ b/tensorflow/java/maven/run_inside_container.sh @@ -39,8 +39,6 @@ if [[ "${DEPLOY_BINTRAY}" != "true" && "${DEPLOY_OSSRH}" != "true" ]]; then exit 2 fi -IS_SNAPSHOT="true" - set -ex clean() { @@ -48,7 +46,9 @@ clean() { # (though if run inside a clean docker container, there won't be any dirty # artifacts lying around) mvn -q clean - rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow_jni_gpu/src libtensorflow_jni_gpu/target libtensorflow/src libtensorflow/target tensorflow-android/target + rm -rf libtensorflow_jni/src libtensorflow_jni/target libtensorflow_jni_gpu/src libtensorflow_jni_gpu/target \ + libtensorflow/src libtensorflow/target tensorflow-android/target \ + tensorflow-hadoop/src spark-tensorflow-connector/src } update_version_in_pom() { @@ -188,6 +188,9 @@ generate_java_protos() { } +# Download the TensorFlow ecosystem source from git. +# The pom files from this repo do not inherit from the parent pom so the maven version +# is updated for each module. download_tf_ecosystem() { ECOSYSTEM_DIR="/tmp/tensorflow-ecosystem" HADOOP_DIR="${DIR}/tensorflow-hadoop" @@ -203,18 +206,15 @@ download_tf_ecosystem() { # Copy the TensorFlow Hadoop source cp -r "${ECOSYSTEM_DIR}/ecosystem/hadoop/src" "${HADOOP_DIR}" - python ${HADOOP_DIR}/update.py --template ${HADOOP_DIR}/pom-hadoop.xml.template \ - --input_pom ${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml \ - --output_pom ${HADOOP_DIR}/pom.xml \ - --version ${TF_VERSION} + cp "${ECOSYSTEM_DIR}/ecosystem/hadoop/pom.xml" "${HADOOP_DIR}" + cd "${HADOOP_DIR}" + update_version_in_pom # Copy the TensorFlow Spark connector source cp -r "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/src" "${SPARK_DIR}" - python ${SPARK_DIR}/update.py --template ${SPARK_DIR}/pom-spark.xml.template \ - --input_pom ${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml \ - --output_pom ${SPARK_DIR}/pom.xml \ - --version ${TF_VERSION} \ - --scala_version 2.11 + cp "${ECOSYSTEM_DIR}/ecosystem/spark/spark-tensorflow-connector/pom.xml" "${SPARK_DIR}" + cd "${SPARK_DIR}" + update_version_in_pom # Cleanup rm -rf "${ECOSYSTEM_DIR}" @@ -280,6 +280,7 @@ cd "${DIR}" # process. # gnupg2 is required for signing apt-get -qq update && apt-get -qqq install -y gnupg2 && apt-get -qqq install -y git + clean update_version_in_pom download_libtensorflow @@ -288,6 +289,7 @@ download_libtensorflow_jni_gpu update_tensorflow_android generate_java_protos download_tf_ecosystem + # Build the release artifacts mvn verify # Push artifacts to repository diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml new file mode 100644 index 0000000000..8c962d111f --- /dev/null +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + TensorFlow TFRecord connector for Apache Spark DataFrames + spark-tensorflow-connector + jar + + + https://github.com/tensorflow/ecosystem.git + git@github.com:tensorflow/ecosystem.git + scm:git:https://github.com/tensorflow/ecosystem.git + + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + 1.8.0 + ../ + + diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml new file mode 100644 index 0000000000..ee90d8c92b --- /dev/null +++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml @@ -0,0 +1,24 @@ + + + 4.0.0 + TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop + tensorflow-hadoop + jar + + + https://github.com/tensorflow/ecosystem.git + git@github.com:tensorflow/ecosystem.git + scm:git:https://github.com/tensorflow/ecosystem.git + + + https://github.com/tensorflow/ecosystem/ + + org.tensorflow + parentpom + 1.8.0 + ../ + + -- GitLab From 90b01f238d83d833ce9a843845dd96bb816a6c76 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Tue, 8 May 2018 18:46:35 -0700 Subject: [PATCH 0020/1310] Delete templating approach for deploying TensorFlow ecosystem jars --- .../pom-spark.xml.template | 19 --- .../spark-tensorflow-connector/update.py | 152 ------------------ .../tensorflow-hadoop/pom-hadoop.xml.template | 18 --- .../java/maven/tensorflow-hadoop/update.py | 114 ------------- 4 files changed, 303 deletions(-) delete mode 100644 tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template delete mode 100644 tensorflow/java/maven/spark-tensorflow-connector/update.py delete mode 100644 tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template delete mode 100644 tensorflow/java/maven/tensorflow-hadoop/update.py diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template b/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template deleted file mode 100644 index d8a3d559be..0000000000 --- a/tensorflow/java/maven/spark-tensorflow-connector/pom-spark.xml.template +++ /dev/null @@ -1,19 +0,0 @@ - - 4.0.0 - TensorFlow TFRecord connector for Apache Spark DataFrames - spark-tensorflow-connector_${scala_version} - ${version} - jar - - https://github.com/tensorflow/ecosystem/ - - org.tensorflow - parentpom - ${version} - ../ - - - diff --git a/tensorflow/java/maven/spark-tensorflow-connector/update.py b/tensorflow/java/maven/spark-tensorflow-connector/update.py deleted file mode 100644 index 6185ccbb00..0000000000 --- a/tensorflow/java/maven/spark-tensorflow-connector/update.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Merge TensorFlow Spark connector pom from with deployment template. - -The TensorFlow Spark connector pom is here: https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import string -import xml.etree.ElementTree as ET - -POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" -SCALA_VERSION_TAG = "scala.binary.version" - - -def get_args(): - """Parse command line args.""" - parser = argparse.ArgumentParser() - parser.add_argument( - '--version', - required=True, - help='Version for the artifact.') - parser.add_argument( - '--scala_version', - required=True, - choices=['2.10', '2.11'], - help='Scala version for the artifact.') - parser.add_argument( - '--template', - required=True, - help='Path to the pom file template.') - parser.add_argument( - '--input_pom', - required=True, - help='Path to input pom file to merge with template.') - parser.add_argument( - '--output_pom', - required=True, - help='Path to output pom file.') - return parser.parse_args() - - -def load_pom(input_path): - """ Loads POM file to XML tree""" - ET.register_namespace("", POM_NAMESPACE) - tree = ET.parse(input_path) - return tree - - -def update_scala_version(tree, version, is_template=False): - """ Updates scala version in XML tree""" - - if is_template: - tag = "{%s}artifactId" % POM_NAMESPACE - nodes = tree.findall(tag) - - if nodes is None: - raise ValueError("Missing artifactId in template pom") - - for node in nodes: - template = string.Template(node.text) - - text = template.substitute({"scala_version": version}) - node.text = text - else: - # Update scala version property in pom - tag = "{%s}%s" % (POM_NAMESPACE, SCALA_VERSION_TAG) - nodes = nodes = list(tree.iter(tag)) - - if len(nodes) == 0: - raise ValueError("Missing %s property in Spark connector pom") - - for node in nodes: - node.text = version - - return tree - - -def update_version(tree, version): - """ Updates version tags in XML tree """ - version_tag = "{%s}version" % POM_NAMESPACE - nodes = list(tree.iter(version_tag)) - - if len(nodes) == 0: - raise ValueError("Missing version in template pom") - - for node in nodes: - node.text = version - - return tree - - -def merge_tags(template_root, pom_root): - """ Merge pom file from TensorFlow Spark connector with deployment template. - - Modify the TensorFlow Spark connector pom to inherit parent pom and version info and - other tags provided by deployment template. - - TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed - for propagating the deployment profile. - - Args: - template_root: Root XML element for template file. - pom_root: Root XML element for TensorFlow Spark connector pom file. - - Return: - template_root: Root XML element with merged tree. - """ - template_tags = [child.tag for child in template_root] - template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent - - for child in pom_root: - if child.tag not in template_tags: - template_root.append(child) - - return template_root - - -def main(): - args = get_args() - template_tree = load_pom(args.template) - pom_tree = load_pom(args.input_pom) - - template_tree = update_version(template_tree, args.version) - template_tree = update_scala_version(template_tree, args.scala_version, is_template=True) - pom_tree = update_scala_version(pom_tree, args.scala_version, is_template=False) - template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) - - with open(args.output_pom, "w") as f: - f.write(ET.tostring(template_root)) - - -if __name__ == '__main__': - sys.exit(main()) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template b/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template deleted file mode 100644 index 6a82c56cc7..0000000000 --- a/tensorflow/java/maven/tensorflow-hadoop/pom-hadoop.xml.template +++ /dev/null @@ -1,18 +0,0 @@ - - 4.0.0 - TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop - tensorflow-hadoop - ${version} - jar - - https://github.com/tensorflow/ecosystem/ - - org.tensorflow - parentpom - ${version} - ../ - - diff --git a/tensorflow/java/maven/tensorflow-hadoop/update.py b/tensorflow/java/maven/tensorflow-hadoop/update.py deleted file mode 100644 index 503062608d..0000000000 --- a/tensorflow/java/maven/tensorflow-hadoop/update.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Merge TensorFlow Hadoop pom from with deployment template. - -The TensorFlow Hadoop pom is here: https://github.com/tensorflow/ecosystem/tree/master/hadoop -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys -import xml.etree.ElementTree as ET - -POM_NAMESPACE = "http://maven.apache.org/POM/4.0.0" - - -def get_args(): - """Parse command line args.""" - parser = argparse.ArgumentParser() - parser.add_argument( - '--version', - required=True, - help='Version for the artifact.') - parser.add_argument( - '--template', - required=True, - help='Path to the pom file template.') - parser.add_argument( - '--input_pom', - required=True, - help='Path to input pom file to merge with template.') - parser.add_argument( - '--output_pom', - required=True, - help='Path to output pom file.') - return parser.parse_args() - - -def load_pom(input_path): - """ Loads POM file to XML tree""" - ET.register_namespace("", POM_NAMESPACE) - tree = ET.parse(input_path) - return tree - - -def update_version(tree, version): - """ Updates version tags in XML tree """ - version_tag = "{%s}version" % POM_NAMESPACE - nodes = list(tree.iter(version_tag)) - - if len(nodes) == 0: - raise ValueError("Missing version in template pom") - - for node in nodes: - node.text = version - - return tree - - -def merge_tags(template_root, pom_root): - """ Merge pom file from TensorFlow Hadoop with deployment template. - - Modify the TensorFlow Hadoop pom to inherit parent pom and version info and - other tags provided by deployment template. - - TODO: Figure out if there is a cleaner way of doing this. Inheritance is needed - for propagating the deployment profile. - - Args: - template_root: Root XML element for template file. - pom_root: Root XML element for TensorFlow Hadoop pom file. - - Return: - template_root: Root XML element with merged tree. - """ - template_tags = [child.tag for child in template_root] - template_tags.append("{%s}groupId" % POM_NAMESPACE) # skip groupId since it is inherited from parent - - for child in pom_root: - if child.tag not in template_tags: - template_root.append(child) - - return template_root - - -def main(): - args = get_args() - template_tree = load_pom(args.template) - pom_tree = load_pom(args.input_pom) - - template_tree = update_version(template_tree, args.version) - template_root = merge_tags(template_tree.getroot(), pom_tree.getroot()) - - with open(args.output_pom, "w") as f: - f.write(ET.tostring(template_root)) - - -if __name__ == '__main__': - sys.exit(main()) -- GitLab From 78da41f8f16871cd1328218cbabcfc82dbecf8a3 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 9 May 2018 14:12:54 -0700 Subject: [PATCH 0021/1310] Subgraph to graphdef --- .../contrib/tensorrt/convert/convert_nodes.cc | 60 +++++++++++++++++++ .../contrib/tensorrt/convert/convert_nodes.h | 4 ++ 2 files changed, 64 insertions(+) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 3767596f8c..9b9ce51097 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -53,8 +53,11 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::str_util::Split; + using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; + namespace { inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, @@ -2723,6 +2726,63 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( return tensorflow::Status::OK(); } +// This needs to be called before TensorRT nodes inserted in order to correctly +// get sizes from the original graph +tensorflow::Status ConvertSegmentToGraphDef( + tensorflow::tensorrt::convert::SubGraphParams& params, + tensorflow::GraphDef* segment_def, + std::unordered_map *input_placeholder_map + ) { + //std::unordered_map input_placeholder_map; + for (size_t i = 0; i < params.input_inds.size(); ++i) { + auto& inputs = params.input_inds.at(i); + auto input_node = params.graph.FindNodeId(inputs.first); + if (input_node) { + tensorflow::DataType input_type = tensorflow::DT_FLOAT; + tensorflow::PartialTensorShape partial_shape; + + if (params.graph_properties.HasOutputProperties(input_node->name())) { + auto output_params = + params.graph_properties.GetOutputProperties(input_node->name()); + auto out_shape = output_params.at(inputs.second); + input_type = out_shape.dtype(); + std::vector dims; + for (const auto d : out_shape.shape().dim()) { + dims.push_back(d.size()); + } + tensorflow::PartialTensorShape::MakePartialShape( + dims.data(), dims.size(), &partial_shape); + } + tensorflow::NodeDef dummy_placeholder; + string node_name("InputPH_"); + StrAppend(&node_name, i); + input_placeholder_map->insert({input_node->name(),node_name}); + tensorflow::NodeDefBuilder dph_builder(node_name, "Placeholder"); + auto status = dph_builder.Attr("shape", partial_shape) + .Attr("dtype", input_type) + .Finalize(&dummy_placeholder); + auto seg_node = segment_def->add_node(); + seg_node->CopyFrom(dummy_placeholder); + } + } + for (const auto node_id : params.subgraph_node_ids) { + const auto node = params.graph.FindNodeId(node_id); + if (node) { + auto snode = segment_def->add_node(); + snode->CopyFrom(node->def()); + // check node inputs to see if it was connected to input node and update + // it to point to placeholder if necessary + for (int i = 0; i < snode->input_size(); ++i) { + auto node_input = Split(snode->input(i), ":"); + string node_input_name = node_input[0]; + auto it = input_placeholder_map->find(node_input_name); + if (it != input_placeholder_map->end()) { + snode->set_input(i, it->second); + } + } + } + } +} } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 3f6592cd25..903867fa7f 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -85,6 +85,10 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); tensorflow::Status InjectCalibrationNode(SubGraphParams& params); tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, tensorflow::Node* c_node); +tensorflow::Status ConvertSegmentToGraphDef( + tensorflow::tensorrt::convert::SubGraphParams& params, + tensorflow::GraphDef* segment_def, + std::unordered_map input_placeholder_map); } // namespace convert } // namespace tensorrt } // namespace tensorflow -- GitLab From b7c333dc75041b05ef4b0023db5dbbda4a817283 Mon Sep 17 00:00:00 2001 From: Dan Osipov Date: Wed, 16 May 2018 16:42:47 -0700 Subject: [PATCH 0022/1310] Resize first, pad second --- tensorflow/python/ops/image_ops_impl.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index d5ac72bac6..a070a4699f 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1048,19 +1048,19 @@ def resize_image_with_pad(image, target_height, target_width, # Find the ratio by which the image must be adjusted # to fit within the target ratio = max_(f_width / f_target_width, f_height / f_target_height) - p_height_float = max_((f_target_height - (f_height / ratio)) * ratio / 2, 0) - p_width_float = max_((f_target_width - (f_width / ratio)) * ratio / 2, 0) - p_height = math_ops.cast(math_ops.ceil(p_height_float), dtype=dtypes.int32) - p_width = math_ops.cast(math_ops.ceil(p_width_float), dtype=dtypes.int32) + resized_height_float = f_height / ratio + resized_width_float = f_width / ratio + resized_height = math_ops.cast(math_ops.floor(p_height_float), dtype=dtypes.int32) + resized_width = math_ops.cast(math_ops.floor(p_width_float), dtype=dtypes.int32) - padded_height = height + (p_height * 2) - padded_width = width + (p_width * 2) + p_height = target_height - resized_height + p_weight = target_width - resized_width - # Pad first, then resize to meet requested dimensions + # Resize first, then pad to meet requested dimensions + resized = resize_images(image, [resized_height, resized_width], method) + padded = pad_to_bounding_box(image, p_height, p_width, - padded_height, padded_width) - - resized = resize_images(padded, [target_height, target_width], method) + target_height, target_width) if resized.get_shape().ndims is None: raise ValueError('resized contains no shape.') -- GitLab From 05e4d01dd5db5125969b29405bbf9c4eea4a0cd3 Mon Sep 17 00:00:00 2001 From: nrstott Date: Fri, 18 May 2018 11:15:21 -0400 Subject: [PATCH 0023/1310] accept pd.DataFrame as y for pandas_input_fn --- .../python/estimator/inputs/pandas_io.py | 15 ++++++++--- .../python/estimator/inputs/pandas_io_test.py | 25 +++++++++++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index bd06843021..abf3f33519 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -50,7 +50,7 @@ def pandas_input_fn(x, Args: x: pandas `DataFrame` object. - y: pandas `Series` object. `None` if absent. + y: pandas `Series` object or `DataFrame`. `None` if absent. batch_size: int, size of batches to return. num_epochs: int, number of epochs to iterate over data. If not `None`, read attempts that would exceed this value will raise `OutOfRangeError`. @@ -87,7 +87,13 @@ def pandas_input_fn(x, if not np.array_equal(x.index, y.index): raise ValueError('Index for x and y are mismatched.\nIndex for x: %s\n' 'Index for y: %s\n' % (x.index, y.index)) - x[target_column] = y + if isinstance(y, pd.DataFrame): + target_column = list(y) + print(target_column) + x[target_column] = y + print(x) + else: + x[target_column] = y # TODO(mdan): These are memory copies. We probably don't need 4x slack space. # The sizes below are consistent with what I've seen elsewhere. @@ -117,7 +123,10 @@ def pandas_input_fn(x, features = features[1:] features = dict(zip(list(x.columns), features)) if y is not None: - target = features.pop(target_column) + if isinstance(target_column, list): + target = {column: features.pop(column) for column in target_column} + else: + target = features.pop(target_column) return features, target return features return input_fn diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index e5912a3b28..f4970f07b3 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -47,6 +47,16 @@ class PandasIoTest(test.TestCase): y = pd.Series(np.arange(-32, -28), index=index) return x, y + def makeTestDataFrameWithYAsDataFrame(self): + index = np.arange(100, 104) + a = np.arange(4) + b = np.arange(32, 36) + a_label = np.arange(10, 14) + b_label = np.arange(50, 54) + x = pd.DataFrame({'a': a, 'b': b}, index=index) + y = pd.DataFrame({'a_target': a_label, 'b_target': b_label}, index=index) + return x, y + def callInputFnOnce(self, input_fn, session): results = input_fn() coord = coordinator.Coordinator() @@ -89,6 +99,21 @@ class PandasIoTest(test.TestCase): self.assertAllEqual(features['b'], [32, 33]) self.assertAllEqual(target, [-32, -31]) + def testPandasInputFnWhenYIsDataFrame_ProducesExpectedOutput(self): + if not HAS_PANDAS: + return + with self.test_session() as session: + x, y = self.makeTestDataFrameWithYAsDataFrame() + input_fn = pandas_io.pandas_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + + features, targets = self.callInputFnOnce(input_fn, session) + + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertAllEqual(targets['a_target'], [10, 11]) + self.assertAllEqual(targets['b_target'], [50, 51]) + def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return -- GitLab From 45fb10adbde00a82af4576e0de01a5012b0b1ad8 Mon Sep 17 00:00:00 2001 From: nrstott Date: Fri, 18 May 2018 12:01:03 -0400 Subject: [PATCH 0024/1310] handle overlapping columns in pandas_input_fn when y is df --- .../python/estimator/inputs/pandas_io.py | 25 ++++++++++++++++--- .../python/estimator/inputs/pandas_io_test.py | 16 ++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/estimator/inputs/pandas_io.py b/tensorflow/python/estimator/inputs/pandas_io.py index abf3f33519..6918683ce7 100644 --- a/tensorflow/python/estimator/inputs/pandas_io.py +++ b/tensorflow/python/estimator/inputs/pandas_io.py @@ -35,6 +35,22 @@ except ImportError: HAS_PANDAS = False +def _get_unique_target_key(features, target_column_name): + """Returns a key that does not exist in the input DataFrame `features`. + + Args: + features: DataFrame + target_column_name: Name of the target column as a `str` + + Returns: + A unique key that can be used to insert the target into + features. + """ + while target_column_name in features: + target_column_name += '_n' + return target_column_name + + @tf_export('estimator.inputs.pandas_input_fn') def pandas_input_fn(x, y=None, @@ -88,10 +104,9 @@ def pandas_input_fn(x, raise ValueError('Index for x and y are mismatched.\nIndex for x: %s\n' 'Index for y: %s\n' % (x.index, y.index)) if isinstance(y, pd.DataFrame): - target_column = list(y) - print(target_column) + y_columns = [(column, _get_unique_target_key(x, column)) for column in list(y)] + target_column = [v for _, v in y_columns] x[target_column] = y - print(x) else: x[target_column] = y @@ -124,7 +139,9 @@ def pandas_input_fn(x, features = dict(zip(list(x.columns), features)) if y is not None: if isinstance(target_column, list): - target = {column: features.pop(column) for column in target_column} + keys = [k for k, _ in y_columns] + values = [features.pop(column) for column in target_column] + target = {k: v for k, v in zip(keys, values)} else: target = features.pop(target_column) return features, target diff --git a/tensorflow/python/estimator/inputs/pandas_io_test.py b/tensorflow/python/estimator/inputs/pandas_io_test.py index f4970f07b3..f8546abb8a 100644 --- a/tensorflow/python/estimator/inputs/pandas_io_test.py +++ b/tensorflow/python/estimator/inputs/pandas_io_test.py @@ -114,6 +114,22 @@ class PandasIoTest(test.TestCase): self.assertAllEqual(targets['a_target'], [10, 11]) self.assertAllEqual(targets['b_target'], [50, 51]) + def testPandasInputFnWhenYIsDataFrame_HandlesOverlappingColumnNames(self): + if not HAS_PANDAS: + return + with self.test_session() as session: + x, y = self.makeTestDataFrameWithYAsDataFrame() + y = y.rename(columns={'a_target': 'a', 'b_target': 'b'}) + input_fn = pandas_io.pandas_input_fn( + x, y, batch_size=2, shuffle=False, num_epochs=1) + + features, targets = self.callInputFnOnce(input_fn, session) + + self.assertAllEqual(features['a'], [0, 1]) + self.assertAllEqual(features['b'], [32, 33]) + self.assertAllEqual(targets['a'], [10, 11]) + self.assertAllEqual(targets['b'], [50, 51]) + def testPandasInputFn_ProducesOutputsForLargeBatchAndMultipleEpochs(self): if not HAS_PANDAS: return -- GitLab From 416bac50aaa684049bb3270d379316efc5b960c2 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Fri, 25 May 2018 01:06:33 +0200 Subject: [PATCH 0025/1310] [tfgan] Add possibility to export GANEstimator saved model --- tensorflow/contrib/gan/python/estimator/python/head_impl.py | 6 +++++- tensorflow/contrib/gan/python/estimator/python/head_test.py | 5 +++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index ff903a78cc..5b5557bd8f 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -24,6 +24,7 @@ from tensorflow.contrib.gan.python import namedtuples as tfgan_tuples from tensorflow.contrib.gan.python import train as tfgan_train from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator.canned import head +from tensorflow.python.estimator.export import export_output from tensorflow.python.framework import ops from tensorflow.python.ops import metrics as metrics_lib @@ -182,7 +183,10 @@ class GANHead(head._Head): # pylint: disable=protected-access if mode == model_fn_lib.ModeKeys.PREDICT: return model_fn_lib.EstimatorSpec( mode=model_fn_lib.ModeKeys.PREDICT, - predictions=gan_model.generated_data) + predictions=gan_model.generated_data, + export_outputs={ + 'predict': export_output.PredictOutput(gan_model.generated_data) + }) elif mode == model_fn_lib.ModeKeys.EVAL: gan_loss = self.create_loss( features=None, mode=mode, logits=gan_model, labels=None) diff --git a/tensorflow/contrib/gan/python/estimator/python/head_test.py b/tensorflow/contrib/gan/python/estimator/python/head_test.py index 6587f1fc60..c121f322b5 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_test.py @@ -71,13 +71,14 @@ class GANHeadTest(test.TestCase): return {} def _test_modes_helper(self, mode): - self.gan_head.create_estimator_spec( + return self.gan_head.create_estimator_spec( features=None, mode=mode, logits=get_gan_model()) def test_modes_predict(self): - self._test_modes_helper(model_fn_lib.ModeKeys.PREDICT) + spec = self._test_modes_helper(model_fn_lib.ModeKeys.PREDICT) + self.assertItemsEqual(('predict',), spec.export_outputs.keys()) def test_modes_eval(self): self._test_modes_helper(model_fn_lib.ModeKeys.EVAL) -- GitLab From 06ba7827cb4e781ab36e6bbc46cf34e3ea587335 Mon Sep 17 00:00:00 2001 From: Dan Osipov Date: Sun, 27 May 2018 10:33:27 -0700 Subject: [PATCH 0026/1310] Remove unused function --- tensorflow/python/ops/image_ops_impl.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index a070a4699f..6e72ebd634 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1031,12 +1031,6 @@ def resize_image_with_pad(image, target_height, target_width, else: return max(x, y) - def equal_(x, y): - if _is_tensor(x) or _is_tensor(y): - return math_ops.equal(x, y) - else: - return x == y - _, height, width, _ = _ImageDimensions(image, rank=4) # convert values to float, to ease divisions -- GitLab From 7991324a664c4c187c6e7e76d1c7588d79530c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Wed, 30 May 2018 17:40:14 +0800 Subject: [PATCH 0027/1310] ENH: add gradient for sparse_slice --- tensorflow/core/kernels/BUILD | 7 + .../core/kernels/sparse_slice_grad_op.cc | 120 ++++++++++++++++++ tensorflow/core/ops/sparse_ops.cc | 14 ++ 3 files changed, 141 insertions(+) create mode 100644 tensorflow/core/kernels/sparse_slice_grad_op.cc diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 1bf6eafb58..dc1155077b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -3886,6 +3886,7 @@ cc_library( ":sparse_reduce_op", ":sparse_reorder_op", ":sparse_reshape_op", + ":sparse_slice_grad_op", ":sparse_slice_op", ":sparse_softmax", ":sparse_sparse_binary_op_shared", @@ -3971,6 +3972,12 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "sparse_slice_grad_op", + prefix = "sparse_slice_grad_op", + deps = SPARSE_DEPS, +) + tf_kernel_library( name = "sparse_slice_op", prefix = "sparse_slice_op", diff --git a/tensorflow/core/kernels/sparse_slice_grad_op.cc b/tensorflow/core/kernels/sparse_slice_grad_op.cc new file mode 100644 index 0000000000..8d2c597c75 --- /dev/null +++ b/tensorflow/core/kernels/sparse_slice_grad_op.cc @@ -0,0 +1,120 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_util.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/util/sparse/sparse_tensor.h" + +namespace tensorflow { + +template +class SparseSliceGradOp : public OpKernel { + public: + explicit SparseSliceGradOp(OpKernelConstruction *ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext *ctx) override { + const Tensor *backprop_val_grad, *input_indices, *output_indices, *input_start; + OP_REQUIRES_OK(ctx, ctx->input("backprop_val_grad", &backprop_val_grad)); + OP_REQUIRES_OK(ctx, ctx->input("input_indices", &input_indices)); + OP_REQUIRES_OK(ctx, ctx->input("input_start", &input_start)); + OP_REQUIRES_OK(ctx, ctx->input("output_indices", &output_indices)); + + OP_REQUIRES(ctx, + TensorShapeUtils::IsMatrix(input_indices->shape()) && + TensorShapeUtils::IsMatrix(output_indices->shape()), + errors::InvalidArgument( + "Input indices should be matrices but received shapes: ", + input_indices->shape().DebugString(), " and ", + output_indices->shape().DebugString())); + OP_REQUIRES( + ctx, TensorShapeUtils::IsVector(backprop_val_grad->shape()), + errors::InvalidArgument( + "Input backprop_val_grad should be a vector but received shape: ", + backprop_val_grad->shape().DebugString())); + OP_REQUIRES( + ctx, + input_indices->dim_size(1) == output_indices->dim_size(1), + errors::InvalidArgument("The input and output should have the same " + "ndims: got: ", input_indices->dim_size(1), " and ", + output_indices->dim_size(1))); + OP_REQUIRES( + ctx, output_indices->dim_size(0) <= input_indices->dim_size(0), + errors::InvalidArgument("# rows of output_indices should be not greater " + "than of input_indices, got ", + output_indices->dim_size(0), " and ", + input_indices->dim_size(0))); + OP_REQUIRES( + ctx, backprop_val_grad->NumElements() == output_indices->dim_size(0), + errors::InvalidArgument("# elements of backprop_val_grad and # rows of " + "output_indices should match (#nnz of sum): got ", + backprop_val_grad->NumElements(), " and ", + output_indices->dim_size(0))); + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_start->shape()), + errors::InvalidArgument( + "Input start should be a vector but received shape ", + input_start->shape().DebugString())); + + const int num_dims = input_indices->dim_size(1); + const int64 input_nnz = input_indices->dim_size(0); + + Tensor *val_grad; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, TensorShape({input_nnz}), &val_grad)); + + T *val_grad_flat = val_grad->flat().data(); + const T *backprop_val_grad_flat = backprop_val_grad->flat().data(); + memset(val_grad_flat, 0, sizeof(T) * input_nnz); + + // Fill gradients for position where indices of input and ouput are same. + const auto input_indices_mat = input_indices->matrix(); + const auto output_indices_mat = output_indices->matrix(); + const auto input_start_flat = input_start->flat(); + int64 j = 0; + for (int64 i = 0; i < input_nnz && j < backprop_val_grad->NumElements(); + ++i) { + bool isSame = true; + for (int d = 0; d < num_dims; ++d) { + const int64 a = input_indices_mat(i, d); + const int64 b = output_indices_mat(j, d); + const int64 offset = input_start_flat(d); + if (a != b + offset) { + isSame = false; + break; + } + } + if (isSame) { + val_grad_flat[i] = backprop_val_grad_flat[j]; + ++j; + } + } + OP_REQUIRES( + ctx, backprop_val_grad->NumElements() == j, + errors::Internal("Elements of backprop_val_grad aren't eaten up :", + "all: ", backprop_val_grad->NumElements(), + " , used: ", output_indices->dim_size(0))); + } +}; + +#define REGISTER_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("SparseSliceGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + SparseSliceGradOp) + +TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); +#undef REGISTER_KERNELS +} // namespace tensorflow diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc index acc8c782ef..bc0cb2095d 100644 --- a/tensorflow/core/ops/sparse_ops.cc +++ b/tensorflow/core/ops/sparse_ops.cc @@ -302,6 +302,20 @@ REGISTER_OP("SparseSplit") return Status::OK(); }); +REGISTER_OP("SparseSliceGrad") + .Input("backprop_val_grad: T") + .Input("input_indices: int64") + .Input("input_start: int64") + .Input("output_indices: int64") + .Output("val_grad: T") + .Attr("T: numbertype") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle indices; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &indices)); + c->set_output(0, c->Vector(c->Dim(indices, 0))); + return Status::OK(); + }); + REGISTER_OP("SparseSlice") .Input("indices: int64") .Input("values: T") -- GitLab From b0ec8d2c467173ce5a43c13631bc51fd89f072e5 Mon Sep 17 00:00:00 2001 From: Soila Kavulya Date: Wed, 30 May 2018 19:23:08 -0700 Subject: [PATCH 0028/1310] Update artifactId for TensorFlow Hadoop and spark-connector jars --- tensorflow/java/maven/README.md | 4 ++-- .../java/maven/{tensorflow-hadoop => hadoop}/pom.xml | 4 ++-- tensorflow/java/maven/pom.xml | 4 ++-- tensorflow/java/maven/run_inside_container.sh | 10 ++++++---- .../pom.xml | 4 ++-- 5 files changed, 14 insertions(+), 12 deletions(-) rename tensorflow/java/maven/{tensorflow-hadoop => hadoop}/pom.xml (94%) rename tensorflow/java/maven/{spark-tensorflow-connector => spark-connector}/pom.xml (93%) diff --git a/tensorflow/java/maven/README.md b/tensorflow/java/maven/README.md index fa756815a9..3e030dcd09 100644 --- a/tensorflow/java/maven/README.md +++ b/tensorflow/java/maven/README.md @@ -53,10 +53,10 @@ There are seven artifacts and thus `pom.xml`s involved in this release: 7. [`parentpom`](https://maven.apache.org/pom/index.html): Common settings shared by all of the above. -8. `tensorflow-hadoop`: The TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop. +8. `hadoop`: The TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop. The source code for this package is available in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/hadoop) -9. `spark-tensorflow-connector`: A Scala library for loading and storing TensorFlow TFRecord +9. `spark-connector`: A Scala library for loading and storing TensorFlow TFRecord using Apache Spark DataFrames. The source code for this package is available in the [TensorFlow Ecosystem](https://github.com/tensorflow/ecosystem/tree/master/spark/spark-tensorflow-connector) diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/hadoop/pom.xml similarity index 94% rename from tensorflow/java/maven/tensorflow-hadoop/pom.xml rename to tensorflow/java/maven/hadoop/pom.xml index ee90d8c92b..a872c20d3b 100644 --- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml +++ b/tensorflow/java/maven/hadoop/pom.xml @@ -5,7 +5,7 @@ 4.0.0 TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop - tensorflow-hadoop + hadoop jar @@ -21,4 +21,4 @@ 1.8.0 ../ - + \ No newline at end of file diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 7a95fb2556..19287f8245 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -32,8 +32,8 @@ libtensorflow_jni_gpu tensorflow proto - tensorflow-hadoop - spark-tensorflow-connector + hadoop + spark-connector 4.0.0 TensorFlow TFRecord connector for Apache Spark DataFrames - spark-tensorflow-connector + spark-connector jar @@ -21,4 +21,4 @@ 1.8.0 ../ - + \ No newline at end of file -- GitLab From c47442d74edf0de11cad2975662a21bd27b9bf68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 31 May 2018 10:45:28 +0800 Subject: [PATCH 0029/1310] ENH: add gradient in python side --- tensorflow/python/kernel_tests/BUILD | 1 + .../kernel_tests/sparse_slice_op_test.py | 22 ++++++++++++-- tensorflow/python/ops/sparse_grad.py | 30 +++++++++++++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 83b353600a..82f3357515 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -892,6 +892,7 @@ tf_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:framework", + "//tensorflow/python:sparse_grad", "//tensorflow/python:sparse_ops", ], ) diff --git a/tensorflow/python/kernel_tests/sparse_slice_op_test.py b/tensorflow/python/kernel_tests/sparse_slice_op_test.py index da116601f8..38eed897cf 100644 --- a/tensorflow/python/kernel_tests/sparse_slice_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_slice_op_test.py @@ -21,13 +21,15 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import sparse_ops +import tensorflow.python.ops.sparse_grad # pylint: disable=unused-import from tensorflow.python.platform import test class SparseSliceOpTest(test.TestCase): - def _SparseTensor_4x6(self): + def _SparseTensor_4x6(self, val_dtype=np.int64): # [0 | |2 | |4 |5 ] # [ |11| |13|14| ] # [20| | |23| |25] @@ -37,7 +39,7 @@ class SparseSliceOpTest(test.TestCase): [2, 3], [2, 5], [3, 0], [3, 2], [3, 3], [3, 5]]).astype( np.int64) val = np.array([0, 2, 4, 5, 11, 13, 14, 20, 23, 25, 30, 32, 33, 35]).astype( - np.int64) + val_dtype) shape = np.array([4, 6]).astype(np.int64) return sparse_tensor.SparseTensor(ind, val, shape) @@ -244,6 +246,22 @@ class SparseSliceOpTest(test.TestCase): self.assertAllEqual(sparse_tensor5.values.eval(), [5, 25, 35]) self.assertAllEqual(sparse_tensor5.dense_shape.eval(), [4, 1]) + def testGradients(self): + sp_input = self._SparseTensor_4x6(val_dtype=np.float32) + start_and_size = [([0, 0], [4, 2]), + ([0, 2], [5, 2]), + ([0, 4], [5, 3])] + + with self.test_session(use_gpu=False): + for start, size in start_and_size: + sp_output = sparse_ops.sparse_slice(sp_input, start, size) + nnz_in = len(sp_input.values.eval()) + nnz_out = len(sp_output.values.eval()) + + err = gradient_checker.compute_gradient_error( + [sp_input.values], [(nnz_in,)], sp_output.values, (nnz_out,)) + self.assertLess(err, 1e-3) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py index 97353d6c74..3ed94738e0 100644 --- a/tensorflow/python/ops/sparse_grad.py +++ b/tensorflow/python/ops/sparse_grad.py @@ -116,6 +116,36 @@ def _SparseReduceSumGrad(op, out_grad): None, None) +@ops.RegisterGradient("SparseSlice") +def _SparseSliceGrad(op, *grads): + """The backward operator for the SparseSlice op. + + The SparseAdd op calculates A + B, where A, B, and the sum are all represented + as `SparseTensor` objects. This op takes in the upstream gradient w.r.t. + non-empty values of the sum, and outputs the gradients w.r.t. the non-empty + values of A and B. + + Args: + op: the SparseAdd op + *grads: the incoming gradients, one element per output of `op` + + Returns: + Gradient for each of the 6 input tensors of SparseAdd: + (a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh) + The gradients for the indices, shapes, and the threshold are None. + """ + backprop_val_grad = grads[1] + input_indices = op.inputs[0] + input_start = op.inputs[3] + output_indices = op.outputs[0] + + val_grad = gen_sparse_ops.sparse_slice_grad( + backprop_val_grad, input_indices, input_start, output_indices) + val_grad.set_shape(op.inputs[1].get_shape()) + # (indices, values, shape, start, size) + return (None, val_grad, None, None, None) + + @ops.RegisterGradient("SparseTensorDenseMatMul") def _SparseTensorDenseMatMulGrad(op, grad): """Gradients for the dense tensor in the SparseTensorDenseMatMul op. -- GitLab From 86191c9f267fbd157c199c410e8d46574d034782 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 31 May 2018 12:27:39 +0800 Subject: [PATCH 0030/1310] DOC: add document --- .../base_api/api_def_SparseSliceGrad.pbtxt | 40 +++++++++++++++++++ .../python_api/api_def_SparseSliceGrad.pbtxt | 4 ++ tensorflow/python/ops/sparse_grad.py | 15 ++++--- 3 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt new file mode 100644 index 0000000000..51af6adcf1 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt @@ -0,0 +1,40 @@ +op { + graph_op_name: "SparseSliceGrad" + in_arg { + name: "backprop_val_grad" + description: <