From 970972d242e26d0cdf635d3af646df1a519dc677 Mon Sep 17 00:00:00 2001 From: ThisIsPIRI <34787507+ThisIsPIRI@users.noreply.github.com> Date: Mon, 6 Aug 2018 14:41:37 +0900 Subject: [PATCH 0001/1095] Clarify what happens when a new value is input to some methods --- tensorflow/contrib/training/python/training/hparam.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 3beb7bfe30..9f5059b4b1 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -494,6 +494,7 @@ class HParams(object): value: New value of the hyperparameter. Raises: + KeyError: If the hyperparameter doesn't exist. ValueError: If there is a type mismatch. """ param_type, is_list = self._hparam_types[name] @@ -510,7 +511,7 @@ class HParams(object): setattr(self, name, _cast_to_type_if_compatible(name, param_type, value)) def del_hparam(self, name): - """Removes the hyperparameter with key 'name'. + """Removes the hyperparameter with key 'name'. Does nothing if it isn't present. Args: name: Name of the hyperparameter. @@ -532,7 +533,7 @@ class HParams(object): The `HParams` instance. Raises: - ValueError: If `values` cannot be parsed. + ValueError: If `values` cannot be parsed or a hyperparameter in `values` doesn't exist. """ type_map = dict() for name, t in self._hparam_types.items(): @@ -543,7 +544,7 @@ class HParams(object): return self.override_from_dict(values_map) def override_from_dict(self, values_dict): - """Override hyperparameter values, parsing new values from a dictionary. + """Override existing hyperparameter values, parsing new values from a dictionary. Args: values_dict: Dictionary of name:value pairs. @@ -552,6 +553,7 @@ class HParams(object): The `HParams` instance. Raises: + KeyError: If a hyperparameter in `values_dict` doesn't exist. ValueError: If `values_dict` cannot be parsed. """ for name, value in values_dict.items(): @@ -591,7 +593,7 @@ class HParams(object): sort_keys=sort_keys) def parse_json(self, values_json): - """Override hyperparameter values, parsing new values from a json object. + """Override existing hyperparameter values, parsing new values from a json object. Args: values_json: String containing a json object of name:value pairs. @@ -600,6 +602,7 @@ class HParams(object): The `HParams` instance. Raises: + KeyError: If a hyperparameter in `values_json` doesn't exist. ValueError: If `values_json` cannot be parsed. """ values_map = json.loads(values_json) -- GitLab From 26368188c018cdcc1bbb80ce8205fb04305816c2 Mon Sep 17 00:00:00 2001 From: ThisIsPIRI <34787507+ThisIsPIRI@users.noreply.github.com> Date: Mon, 6 Aug 2018 14:56:38 +0900 Subject: [PATCH 0002/1095] Add 'existing' to parse's docstring --- tensorflow/contrib/training/python/training/hparam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/training/python/training/hparam.py b/tensorflow/contrib/training/python/training/hparam.py index 9f5059b4b1..372630df81 100644 --- a/tensorflow/contrib/training/python/training/hparam.py +++ b/tensorflow/contrib/training/python/training/hparam.py @@ -521,7 +521,7 @@ class HParams(object): del self._hparam_types[name] def parse(self, values): - """Override hyperparameter values, parsing new values from a string. + """Override existing hyperparameter values, parsing new values from a string. See parse_values for more detail on the allowed format for values. -- GitLab From bd6c11f878e1820417d1ceff1b02222178f60c16 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 12 Oct 2018 10:10:21 -0700 Subject: [PATCH 0003/1095] Clean out MKL_ML code from batchnorm ops --- .../core/kernels/mkl_fused_batch_norm_op.cc | 658 +----------------- 1 file changed, 2 insertions(+), 656 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 2ec6c8fa89..4b8c066902 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -20,671 +20,19 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/util/tensor_format.h" - -#ifndef INTEL_MKL_ML_ONLY +#include "tensorflow/core/util/mkl_util.h" #include "mkldnn.hpp" + using mkldnn::batch_normalization_backward; using mkldnn::batch_normalization_forward; using mkldnn::prop_kind; using mkldnn::stream; using mkldnn::use_global_stats; using mkldnn::use_scale_shift; -#else -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#endif - -#include "tensorflow/core/util/mkl_util.h" -// TODO(inteltf) Address comments from PR 8968. namespace tensorflow { using CPUDevice = Eigen::ThreadPoolDevice; -#ifdef INTEL_MKL_ML_ONLY - -template -class MklFusedBatchNormOp : public OpKernel { - public: - explicit MklFusedBatchNormOp(OpKernelConstruction* context) - : OpKernel(context) { - float epsilon; - OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon)); - epsilon_ = T(epsilon); - string tensor_format; - OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format)); - OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_), - errors::InvalidArgument("Invalid data format")); - OP_REQUIRES_OK(context, context->GetAttr("is_training", &is_training_)); - } - - void Compute(OpKernelContext* context) override { - MklFusedBatchNormOpContext mkl_context; - const Tensor& input = MklGetInput(context, 0); - const Tensor& scale = MklGetInput(context, 1); - const Tensor& shift = MklGetInput(context, 2); - const Tensor& est_mean = MklGetInput(context, 3); - const Tensor& est_variance = MklGetInput(context, 4); - - GetMklShape(context, 0, &(mkl_context.mkl_shape_input_shape)); - bool input_in_mkl_format = mkl_context.mkl_shape_input_shape.IsMklTensor(); - - if (!input_in_mkl_format) { - OP_REQUIRES(context, input.dims() == 4, - errors::InvalidArgument("input must be 4-dimensional", - input.shape().DebugString())); - } - OP_REQUIRES(context, scale.dims() == 1, - errors::InvalidArgument("scale must be 1-dimensional", - scale.shape().DebugString())); - OP_REQUIRES(context, shift.dims() == 1, - errors::InvalidArgument("offset must be 1-dimensional", - shift.shape().DebugString())); - OP_REQUIRES(context, est_mean.dims() == 1, - errors::InvalidArgument("estimated_mean must be 1-dimensional", - est_mean.shape().DebugString())); - - OP_REQUIRES( - context, est_variance.dims() == 1, - errors::InvalidArgument("estimated_variance must be 1-dimensional", - est_variance.shape().DebugString())); - - if (is_training_) { - OP_REQUIRES(context, est_mean.dim_size(0) == 0, - errors::InvalidArgument("estimated_mean empty for training", - est_mean.shape().DebugString())); - OP_REQUIRES(context, est_variance.dim_size(0) == 0, - errors::InvalidArgument( - "estimated_variance must be empty for training", - est_variance.shape().DebugString())); - } - - unsigned int flag_batch_norm = - is_training_ ? dnnUseScaleShift - : (dnnUseInputMeanVariance | dnnUseScaleShift); - - mkl_context.MklExtractParams(context, tensor_format_); - - // Create layout only for input data as it is used in Op primitive. - mkl_context.MklCreateInputLayout(context); - - // Create Op primitive. - CHECK_EQ(dnnBatchNormalizationCreateForward_v2_F32( - &(mkl_context.mkl_prim_batchnorm), nullptr, - mkl_context.mkl_lt_input, static_cast(epsilon_), - flag_batch_norm), - E_SUCCESS); - - // Temporary tensors with buffers for the context inputs, if - // conversion to MKL-Op specific layouts are required. It is assumed here - // that TF's 1D tensors (scale, shift, est_mean, and est_variance) won't - // require any conversion. - // Since scale-shift is combined in MKL, a buffer is required. - Tensor mkl_tmp_input_buf_tensor, mkl_tmp_scale_shift_buf_tensor; - mkl_context.MklPrepareContextInputs(context, &mkl_tmp_input_buf_tensor, - &mkl_tmp_scale_shift_buf_tensor); - - // Output data in MKL layout - Tensor* output = nullptr; - TensorShape tf_shape_output; - MklShape mkl_shape_output; - mkl_shape_output.SetMklTensor(true); - mkl_shape_output.SetMklLayout(mkl_context.mkl_prim_batchnorm, - dnnResourceDst); - mkl_shape_output.SetTfLayout(mkl_context.mkl_params.in_dim, - mkl_context.mkl_params.in_sizes, - mkl_context.mkl_params.in_strides); - mkl_shape_output.SetTfDimOrder(mkl_context.mkl_params.in_dim, - tensor_format_); - tf_shape_output.AddDim(dnnLayoutGetMemorySize_F32(static_cast( - mkl_shape_output.GetMklLayout())) / - sizeof(T)); - AllocateOutputSetMklShape(context, 0, &output, tf_shape_output, - mkl_shape_output); - mkl_context.mkl_res_batchnorm[dnnResourceDst] = - static_cast(output->flat().data()); - - // Batch mean in TF layout - Tensor* batch_mean = nullptr; - MklShape mkl_shape_batch_mean; - mkl_shape_batch_mean.SetMklTensor(false); - AllocateOutputSetMklShape(context, 1, &batch_mean, scale.shape(), - mkl_shape_batch_mean); - // Batch variance in TF layout - Tensor* batch_variance = nullptr; - MklShape mkl_shape_batch_variance; - mkl_shape_batch_variance.SetMklTensor(false); - AllocateOutputSetMklShape(context, 2, &batch_variance, scale.shape(), - mkl_shape_batch_variance); - // If training mode, set dnnResourceMean and dnnResourceVariance to - // output tensors for batch mean and variance. - // Otherwise, set dnnResourceMean and dnnResourceVariance to - // estimated mean and variance. - if (is_training_) - mkl_context.MklSetMeanVariance(*batch_mean, *batch_variance); - else - mkl_context.MklSetMeanVariance(est_mean, est_variance); - - // Now that all resources are set, it is ready for dnnExecute - CHECK_EQ(dnnExecute_F32(mkl_context.mkl_prim_batchnorm, - mkl_context.mkl_res_batchnorm), - E_SUCCESS); - - // Mean and variance (without Bessel's correction) saved for backward - // computation to serve as pre-computed mean and variance. - Tensor* saved_mean = nullptr; - MklShape mkl_shape_saved_mean; - mkl_shape_saved_mean.SetMklTensor(false); - AllocateOutputSetMklShape(context, 3, &saved_mean, scale.shape(), - mkl_shape_saved_mean); - std::memcpy( - reinterpret_cast(saved_mean->flat().data()), - reinterpret_cast(mkl_context.mkl_res_batchnorm[dnnResourceMean]), - scale.NumElements() * sizeof(float)); - Tensor* saved_variance = nullptr; - MklShape mkl_shape_saved_variance; - mkl_shape_saved_variance.SetMklTensor(false); - AllocateOutputSetMklShape(context, 4, &saved_variance, scale.shape(), - mkl_shape_saved_variance); - std::memcpy(reinterpret_cast(saved_variance->flat().data()), - reinterpret_cast( - mkl_context.mkl_res_batchnorm[dnnResourceVariance]), - scale.NumElements() * sizeof(float)); - - // Bessel's correction on variance, if training mode is on - if (is_training_) { - float* p_var = static_cast(batch_variance->flat().data()); - auto depth = mkl_context.mkl_params.depth; - size_t orig_size = mkl_context.mkl_params.in_sizes[0] * - mkl_context.mkl_params.in_sizes[1] * - mkl_context.mkl_params.in_sizes[3]; - size_t adjust_size = orig_size - 1; - float adjust_factor = (static_cast(orig_size)) / adjust_size; - for (int i = 0; i < depth; i++) p_var[i] = adjust_factor * p_var[i]; - } - - mkl_context.MklCleanup(); - } - - private: - T epsilon_; - TensorFormat tensor_format_; - bool is_training_; - - // Structure containing all info for MklOp - typedef struct { - // Parameters used for input and output layouts - struct MklBatchNormParams { - // BatchNormOp src and - size_t in_dim; - size_t in_sizes[4]; - size_t in_strides[4]; - size_t depth; // Batch normalization is done for per channel. - } mkl_params; - - MklShape mkl_shape_input_shape; - - // MKL primitive and resources for BatchNormOp - dnnPrimitive_t mkl_prim_batchnorm = nullptr; - void* mkl_res_batchnorm[dnnResourceNumber]; - - // MKL layouts for inputs in the context - dnnLayout_t mkl_lt_input = nullptr; - - void MklCleanup() { - bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); - if (!input_in_mkl_format) dnnLayoutDelete_F32(mkl_lt_input); - if (mkl_prim_batchnorm != nullptr) dnnDelete_F32(mkl_prim_batchnorm); - } - - void MklExtractParams(OpKernelContext* context, - const TensorFormat& tensor_format) { - const Tensor& input = MklGetInput(context, 0); - bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); - mkl_params.in_dim = input_in_mkl_format - ? mkl_shape_input_shape.GetDimension() - : input.dims(); - mkl_params.in_sizes[0] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[0] - : GetTensorDim(input, tensor_format, 'W')); - mkl_params.in_sizes[1] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[1] - : GetTensorDim(input, tensor_format, 'H')); - mkl_params.in_sizes[2] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[2] - : GetTensorDim(input, tensor_format, 'C')); - mkl_params.in_sizes[3] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[3] - : GetTensorDim(input, tensor_format, 'N')); - mkl_params.depth = mkl_params.in_sizes[2]; - GetStridesFromSizes(tensor_format, mkl_params.in_strides, - mkl_params.in_sizes); - } - - void MklCreateInputLayout(OpKernelContext* context) { - const Tensor& input = MklGetInput(context, 0); - bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); - if (input_in_mkl_format) { - mkl_lt_input = - static_cast(mkl_shape_input_shape.GetCurLayout()); - } else { - CHECK_EQ( - dnnLayoutCreate_F32(&mkl_lt_input, mkl_params.in_dim, - mkl_params.in_sizes, mkl_params.in_strides), - E_SUCCESS); - } - } - void MklPrepareContextInputs(OpKernelContext* context, - Tensor* mkl_tmp_input_buf_tensor, - Tensor* mkl_tmp_scale_shift_buf_tensor) { - bool mkl_convert_input; - dnnPrimitive_t mkl_prim_convert_input = nullptr; - dnnLayout_t mkl_lt_internal_input = nullptr; - void* mkl_buf_converted_input = nullptr; - // Compare with internal layouts and convert if needed - const Tensor& input = MklGetInput(context, 0); - void* mkl_buf_input = - const_cast(static_cast(input.flat().data())); - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32( - &mkl_lt_internal_input, mkl_prim_batchnorm, dnnResourceSrc), - E_SUCCESS); - mkl_convert_input = - !dnnLayoutCompare_F32(mkl_lt_internal_input, mkl_lt_input); - if (mkl_convert_input) { - CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input, mkl_lt_input, - mkl_lt_internal_input), - E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, mkl_lt_internal_input, - &mkl_buf_converted_input); - CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_input, mkl_buf_input, - mkl_buf_converted_input), - E_SUCCESS); - dnnDelete_F32(mkl_prim_convert_input); - } - dnnLayoutDelete_F32(mkl_lt_internal_input); - mkl_res_batchnorm[dnnResourceSrc] = - (mkl_convert_input) ? mkl_buf_converted_input : mkl_buf_input; - - // scale-shift layout is created from primitive. So no conversion - // is needed, however, a buffer has to be allocated. - dnnLayout_t mkl_lt_scale_shift = nullptr; - void* mkl_buf_scale_shift = nullptr; - CHECK_EQ( - dnnLayoutCreateFromPrimitive_F32( - &mkl_lt_scale_shift, mkl_prim_batchnorm, dnnResourceScaleShift), - E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_scale_shift_buf_tensor, - mkl_lt_scale_shift, &mkl_buf_scale_shift); - // Fill the scale-shift buffer with data, presumably buffer is 2D array - const Tensor& scale = MklGetInput(context, 1); - const Tensor& shift = MklGetInput(context, 2); - float* buf_scale_shift = static_cast(mkl_buf_scale_shift); - float* buf_scale = const_cast( - static_cast(scale.flat().data())); - float* buf_shift = const_cast( - static_cast(shift.flat().data())); - auto depth = mkl_params.depth; - for (int i = 0; i < depth; i++) { - buf_scale_shift[i] = buf_scale[i]; - buf_scale_shift[i + depth] = buf_shift[i]; - } - mkl_res_batchnorm[dnnResourceScaleShift] = mkl_buf_scale_shift; - } - - inline void MklSetMeanVariance(const Tensor& mean, const Tensor& variance) { - mkl_res_batchnorm[dnnResourceMean] = const_cast( - static_cast(mean.flat().data())); - mkl_res_batchnorm[dnnResourceVariance] = const_cast( - static_cast(variance.flat().data())); - } - } MklFusedBatchNormOpContext; -}; - -template -class MklFusedBatchNormGradOp : public OpKernel { - public: - explicit MklFusedBatchNormGradOp(OpKernelConstruction* context) - : OpKernel(context) { - float epsilon; - OP_REQUIRES_OK(context, context->GetAttr("epsilon", &epsilon)); - epsilon_ = T(epsilon); - string tensor_format; - OP_REQUIRES_OK(context, context->GetAttr("data_format", &tensor_format)); - OP_REQUIRES(context, FormatFromString(tensor_format, &tensor_format_), - errors::InvalidArgument("Invalid data format")); - } - - void Compute(OpKernelContext* context) override { - MklFusedBatchNormGradOpContext mkl_context; - - const Tensor& out_backprop = MklGetInput(context, 0); - const Tensor& input = MklGetInput(context, 1); - const Tensor& scale = MklGetInput(context, 2); - const Tensor& saved_mean = MklGetInput(context, 3); - const Tensor& saved_var = MklGetInput(context, 4); - - // Here scale, mean, and variance are 1D and considered - // those having same layout in MKL and TF - GetMklShape(context, 0, &(mkl_context.mkl_shape_out_backprop)); - GetMklShape(context, 1, &(mkl_context.mkl_shape_input_shape)); - - bool input_in_mkl_format = mkl_context.mkl_shape_input_shape.IsMklTensor(); - bool out_backprop_in_mkl_format = - mkl_context.mkl_shape_out_backprop.IsMklTensor(); - if (!out_backprop_in_mkl_format) { - OP_REQUIRES(context, out_backprop.dims() == 4, - errors::InvalidArgument("input must be 4-dimensional", - out_backprop.shape().DebugString())); - } - if (!input_in_mkl_format) { - OP_REQUIRES(context, input.dims() == 4, - errors::InvalidArgument("input must be 4-dimensional", - input.shape().DebugString())); - } - OP_REQUIRES(context, scale.dims() == 1, - errors::InvalidArgument("scale must be 1-dimensional", - scale.shape().DebugString())); - OP_REQUIRES(context, saved_mean.dims() == 1, - errors::InvalidArgument("saved mean must be 1-dimensional", - saved_mean.shape().DebugString())); - OP_REQUIRES(context, saved_var.dims() == 1, - errors::InvalidArgument("saved variance must be 1-dimensional", - saved_var.shape().DebugString())); - - mkl_context.MklExtractParams(context, tensor_format_); - - mkl_context.MklCreateInputLayout(context); - - unsigned int flag_batch_norm_grad = dnnUseScaleShift; - - // Create Backward Op primitive. - CHECK_EQ(dnnBatchNormalizationCreateBackward_v2_F32( - &(mkl_context.mkl_prim_batchnorm_bwd), nullptr, - mkl_context.mkl_lt_input, static_cast(epsilon_), - flag_batch_norm_grad), - E_SUCCESS); - - // Temporary tensors and their buffers if conversion is required - Tensor mkl_tmp_input_buf_tensor, mkl_tmp_outbackprop_buf_tensor, - mkl_tmp_scaleshift_buf_tensor; - mkl_context.MklPrepareContextInputs(context, &mkl_tmp_input_buf_tensor, - &mkl_tmp_outbackprop_buf_tensor, - &mkl_tmp_scaleshift_buf_tensor); - - // Allocate tensor for grad w.r.t. input(x) - Tensor* in_backprop = nullptr; - TensorShape tf_shape_in_backprop; - MklShape mkl_shape_in_backprop; - mkl_shape_in_backprop.SetMklTensor(true); - mkl_shape_in_backprop.SetMklLayout(mkl_context.mkl_prim_batchnorm_bwd, - dnnResourceDiffSrc); - mkl_shape_in_backprop.SetTfLayout(mkl_context.mkl_params.in_dims, - mkl_context.mkl_params.in_sizes, - mkl_context.mkl_params.in_strides); - mkl_shape_in_backprop.SetTfDimOrder(mkl_context.mkl_params.in_dims, - tensor_format_); - tf_shape_in_backprop.AddDim( - dnnLayoutGetMemorySize_F32( - static_cast(mkl_shape_in_backprop.GetMklLayout())) / - sizeof(T)); - AllocateOutputSetMklShape(context, 0, &in_backprop, tf_shape_in_backprop, - mkl_shape_in_backprop); - mkl_context.mkl_res_batchnorm_bwd[dnnResourceDiffSrc] = - static_cast(in_backprop->flat().data()); - - // grad_scale and grad_shift are combined together in MKL - // So create a single temporary buffer for those. - // Also set dnnResourceDiffScaleShift to the temporary buffer - Tensor mkl_tmp_grad_scale_shift_buf_tensor; - mkl_context.MklPrepareGradScaleShift(context, - &mkl_tmp_grad_scale_shift_buf_tensor); - - // All dnn resources are set now, ready to execute - CHECK_EQ(dnnExecute_F32(mkl_context.mkl_prim_batchnorm_bwd, - mkl_context.mkl_res_batchnorm_bwd), - E_SUCCESS); - - // Now separate out scale and shift grad and copy to individual tensors - const TensorShape& tf_shape_scale_shift = scale.shape(); - // Allocate tensor for grad w.r.t. scale (beta) - Tensor* scale_backprop = nullptr; - MklShape mkl_shape_scale_backprop; - AllocateOutputSetMklShape(context, 1, &scale_backprop, tf_shape_scale_shift, - mkl_shape_scale_backprop); - - // Allocate tensor for grad w.r.t. shift(gamma) - Tensor* shift_backprop = nullptr; - MklShape mkl_shape_shift_backprop; - AllocateOutputSetMklShape(context, 2, &shift_backprop, tf_shape_scale_shift, - mkl_shape_shift_backprop); - - // copy scale and shift grads to tensors - float* mkl_buf_scale_shift = const_cast(static_cast( - mkl_tmp_grad_scale_shift_buf_tensor.flat().data())); - float* tf_buf_scale = const_cast( - static_cast(scale_backprop->flat().data())); - float* tf_buf_shift = const_cast( - static_cast(shift_backprop->flat().data())); - auto depth = mkl_context.mkl_params.depth; - for (int i = 0; i < depth; i++) { - tf_buf_scale[i] = mkl_buf_scale_shift[i]; - tf_buf_shift[i] = mkl_buf_scale_shift[i + depth]; - } - - // Two placeholders for estimated_mean and estimated_variance, which are - // used for inference and thus not needed here for gradient computation. - Tensor* placeholder_1 = nullptr; - MklShape mkl_shape_placeholder_1; - AllocateOutputSetMklShape(context, 3, &placeholder_1, TensorShape({}), - mkl_shape_placeholder_1); - Tensor* placeholder_2 = nullptr; - MklShape mkl_shape_placeholder_2; - AllocateOutputSetMklShape(context, 4, &placeholder_2, TensorShape({}), - mkl_shape_placeholder_2); - - mkl_context.MklCleanup(); - } - - private: - T epsilon_; - TensorFormat tensor_format_; - - // Structure containing all info for MklOp - typedef struct { - // Parameters used for input and output layouts - struct MklBatchNormParams { - // BatchNormOp src and - size_t in_dims; - size_t in_sizes[4]; - size_t in_strides[4]; - size_t depth; // Batch normalization is done for per channel. - } mkl_params; - - MklShape mkl_shape_out_backprop; - MklShape mkl_shape_input_shape; - - // MKL primitive and resources for BatchNormOp - dnnPrimitive_t mkl_prim_batchnorm_bwd = nullptr; - void* mkl_res_batchnorm_bwd[dnnResourceNumber]; - - // MKL layouts for inputs in the context - dnnLayout_t mkl_lt_out_backprop = nullptr; - dnnLayout_t mkl_lt_input = nullptr; - - void MklCleanup() { - bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); - bool out_backprop_in_mkl_format = mkl_shape_out_backprop.IsMklTensor(); - if (!input_in_mkl_format) dnnLayoutDelete_F32(mkl_lt_input); - if (!out_backprop_in_mkl_format) dnnLayoutDelete_F32(mkl_lt_out_backprop); - - dnnDelete_F32(mkl_prim_batchnorm_bwd); - } - - void MklExtractParams(OpKernelContext* context, - const TensorFormat& tensor_format) { - const Tensor& input = MklGetInput(context, 1); - bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); - mkl_params.in_dims = input_in_mkl_format - ? mkl_shape_input_shape.GetDimension() - : input.dims(); - mkl_params.in_sizes[0] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[0] - : GetTensorDim(input, tensor_format, 'W')); - mkl_params.in_sizes[1] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[1] - : GetTensorDim(input, tensor_format, 'H')); - mkl_params.in_sizes[2] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[2] - : GetTensorDim(input, tensor_format, 'C')); - mkl_params.in_sizes[3] = static_cast( - input_in_mkl_format ? mkl_shape_input_shape.GetSizes()[3] - : GetTensorDim(input, tensor_format, 'N')); - mkl_params.depth = mkl_params.in_sizes[2]; - GetStridesFromSizes(tensor_format, mkl_params.in_strides, - mkl_params.in_sizes); - } - - void MklCreateInputLayout(OpKernelContext* context) { - const Tensor& input = MklGetInput(context, 0); - bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); - if (input_in_mkl_format) { - mkl_lt_input = - static_cast(mkl_shape_input_shape.GetCurLayout()); - } else { - CHECK_EQ( - dnnLayoutCreate_F32(&mkl_lt_input, mkl_params.in_dims, - mkl_params.in_sizes, mkl_params.in_strides), - E_SUCCESS); - } - - bool out_backprop_in_mkl_format = mkl_shape_out_backprop.IsMklTensor(); - if (out_backprop_in_mkl_format) { - mkl_lt_out_backprop = - static_cast(mkl_shape_out_backprop.GetCurLayout()); - } else { - CHECK_EQ( - dnnLayoutCreate_F32(&mkl_lt_out_backprop, mkl_params.in_dims, - mkl_params.in_sizes, mkl_params.in_strides), - E_SUCCESS); - } - } - - void MklPrepareContextInputs(OpKernelContext* context, - Tensor* mkl_tmp_input_buf_tensor, - Tensor* mkl_tmp_outbackprop_buf_tensor, - Tensor* mkl_tmp_scaleshift_buf_tensor) { - bool mkl_convert_input; - dnnPrimitive_t mkl_prim_convert_input = nullptr; - dnnLayout_t mkl_lt_internal_input = nullptr; - void* mkl_buf_converted_input = nullptr; - // Compare with internal layouts and convert if needed - const Tensor& input = MklGetInput(context, 1); - void* mkl_buf_input = - const_cast(static_cast(input.flat().data())); - CHECK_EQ( - dnnLayoutCreateFromPrimitive_F32( - &mkl_lt_internal_input, mkl_prim_batchnorm_bwd, dnnResourceSrc), - E_SUCCESS); - mkl_convert_input = - !dnnLayoutCompare_F32(mkl_lt_internal_input, mkl_lt_input); - if (mkl_convert_input) { - CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_input, mkl_lt_input, - mkl_lt_internal_input), - E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, mkl_lt_internal_input, - &mkl_buf_converted_input); - CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_input, mkl_buf_input, - mkl_buf_converted_input), - E_SUCCESS); - dnnDelete_F32(mkl_prim_convert_input); - } - dnnLayoutDelete_F32(mkl_lt_internal_input); - mkl_res_batchnorm_bwd[dnnResourceSrc] = - (mkl_convert_input) ? mkl_buf_converted_input : mkl_buf_input; - - bool mkl_convert_out_backprop; - dnnPrimitive_t mkl_prim_convert_out_backprop = nullptr; - dnnLayout_t mkl_lt_internal_out_backprop = nullptr; - void* mkl_buf_converted_out_backprop = nullptr; - // Compare with internal layouts and convert if needed - const Tensor& out_backprop = MklGetInput(context, 0); - void* mkl_buf_out_backprop = const_cast( - static_cast(out_backprop.flat().data())); - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_internal_out_backprop, - mkl_prim_batchnorm_bwd, - dnnResourceDiffDst), - E_SUCCESS); - mkl_convert_out_backprop = !dnnLayoutCompare_F32( - mkl_lt_internal_out_backprop, mkl_lt_out_backprop); - if (mkl_convert_out_backprop) { - CHECK_EQ(dnnConversionCreate_F32(&mkl_prim_convert_out_backprop, - mkl_lt_out_backprop, - mkl_lt_internal_out_backprop), - E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_outbackprop_buf_tensor, - mkl_lt_internal_out_backprop, - &mkl_buf_converted_out_backprop); - CHECK_EQ(dnnConversionExecute_F32(mkl_prim_convert_out_backprop, - mkl_buf_out_backprop, - mkl_buf_converted_out_backprop), - E_SUCCESS); - dnnDelete_F32(mkl_prim_convert_out_backprop); - } - dnnLayoutDelete_F32(mkl_lt_internal_out_backprop); - mkl_res_batchnorm_bwd[dnnResourceDiffDst] = - (mkl_convert_out_backprop) ? mkl_buf_converted_out_backprop - : mkl_buf_out_backprop; - - // Set dnnResourceMean and dnnResourceVariance - const Tensor& saved_mean = MklGetInput(context, 3); - const Tensor& saved_var = MklGetInput(context, 4); - void* mkl_buf_saved_mean = const_cast( - static_cast(saved_mean.flat().data())); - void* mkl_buf_saved_var = const_cast( - static_cast(saved_var.flat().data())); - mkl_res_batchnorm_bwd[dnnResourceMean] = mkl_buf_saved_mean; - mkl_res_batchnorm_bwd[dnnResourceVariance] = mkl_buf_saved_var; - - // Set dnnResourceScaleShift - // Note backward Op needs only current values of scale parameters, - // shift parameters could be garbage and won't be used - const Tensor& scale = MklGetInput(context, 2); - dnnLayout_t mkl_lt_scale_shift = nullptr; - void* mkl_buf_scale_shift = nullptr; - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_scale_shift, - mkl_prim_batchnorm_bwd, - dnnResourceScaleShift), - E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_scaleshift_buf_tensor, mkl_lt_scale_shift, - &mkl_buf_scale_shift); - float* pscale = - const_cast(static_cast(scale.flat().data())); - float* pscale_shift = static_cast(mkl_buf_scale_shift); - auto depth = mkl_params.depth; - for (int i = 0; i < depth; i++) pscale_shift[i] = pscale[i]; - mkl_res_batchnorm_bwd[dnnResourceScaleShift] = mkl_buf_scale_shift; - dnnLayoutDelete_F32(mkl_lt_scale_shift); - } - - void MklPrepareGradScaleShift(OpKernelContext* context, - Tensor* mkl_tmp_grad_scale_shift_buf_tensor) { - dnnLayout_t mkl_lt_grad_scaleshift = nullptr; - void* mkl_buf_grad_scaleshift = nullptr; - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(&mkl_lt_grad_scaleshift, - mkl_prim_batchnorm_bwd, - dnnResourceDiffScaleShift), - E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_grad_scale_shift_buf_tensor, - mkl_lt_grad_scaleshift, &mkl_buf_grad_scaleshift); - mkl_res_batchnorm_bwd[dnnResourceDiffScaleShift] = - mkl_buf_grad_scaleshift; - dnnLayoutDelete_F32(mkl_lt_grad_scaleshift); - } - } MklFusedBatchNormGradOpContext; -}; -#endif - -#ifndef INTEL_MKL_ML_ONLY - struct MklBatchNormFwdParams { memory::dims src_dims; int depth; @@ -1765,8 +1113,6 @@ class MklFusedBatchNormGradOp : public OpKernel { memory::dims GetMeanVarianceDims() { return memory::dims({1, depth_}); } }; -#endif - #define REGISTER_MKL_CPU(T) \ REGISTER_KERNEL_BUILDER(Name("_MklFusedBatchNorm") \ .Device(DEVICE_CPU) \ -- GitLab From cb9dccae23f34edb15cdbe58ad6fceab702138fb Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 12 Oct 2018 10:41:32 -0700 Subject: [PATCH 0004/1095] adjust headers inclusion order per review suggestion of another PR --- tensorflow/core/kernels/mkl_fused_batch_norm_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 4b8c066902..ff46e75a36 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -19,8 +19,8 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/util/tensor_format.h" #include "tensorflow/core/util/mkl_util.h" +#include "tensorflow/core/util/tensor_format.h" #include "mkldnn.hpp" using mkldnn::batch_normalization_backward; -- GitLab From bae74d26f93872374b48c60a73d189df148a6f99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 16 Oct 2018 09:45:45 +0800 Subject: [PATCH 0005/1095] CLN: remove reshape in conv3d, becasue bias_add has supported 5-dim --- .../python/keras/layers/convolutional.py | 33 ++----------------- .../python/keras/layers/convolutional_test.py | 16 +++++++++ 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional.py b/tensorflow/python/keras/layers/convolutional.py index 58024677ee..f8afa0d430 100644 --- a/tensorflow/python/keras/layers/convolutional.py +++ b/tensorflow/python/keras/layers/convolutional.py @@ -199,21 +199,8 @@ class Conv(Layer): # nn.bias_add does not accept a 1D input tensor. bias = array_ops.reshape(self.bias, (1, self.filters, 1)) outputs += bias - if self.rank == 2: + else: outputs = nn.bias_add(outputs, self.bias, data_format='NCHW') - if self.rank == 3: - # As of Mar 2017, direct addition is significantly slower than - # bias_add when computing gradients. To use bias_add, we collapse Z - # and Y into a single dimension to obtain a 4D input tensor. - outputs_shape = outputs.shape.as_list() - if outputs_shape[0] is None: - outputs_shape[0] = -1 - outputs_4d = array_ops.reshape(outputs, - [outputs_shape[0], outputs_shape[1], - outputs_shape[2] * outputs_shape[3], - outputs_shape[4]]) - outputs_4d = nn.bias_add(outputs_4d, self.bias, data_format='NCHW') - outputs = array_ops.reshape(outputs_4d, outputs_shape) else: outputs = nn.bias_add(outputs, self.bias, data_format='NHWC') @@ -1127,24 +1114,10 @@ class Conv3DTranspose(Conv3D): outputs.set_shape(out_shape) if self.use_bias: - outputs_shape = outputs.shape.as_list() - if outputs_shape[0] is None: - outputs_shape[0] = -1 - if self.data_format == 'channels_first': - outputs_4d = array_ops.reshape(outputs, [ - outputs_shape[0], outputs_shape[1], - outputs_shape[2] * outputs_shape[3], outputs_shape[4] - ]) - else: - outputs_4d = array_ops.reshape(outputs, [ - outputs_shape[0], outputs_shape[1] * outputs_shape[2], - outputs_shape[3], outputs_shape[4] - ]) - outputs_4d = nn.bias_add( - outputs_4d, + outputs = nn.bias_add( + outputs, self.bias, data_format=conv_utils.convert_data_format(self.data_format, ndim=4)) - outputs = array_ops.reshape(outputs_4d, outputs_shape) if self.activation is not None: return self.activation(outputs) diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 4afddbc8cc..63fb60ebaf 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -336,6 +336,14 @@ class Conv3DTransposeTest(test.TestCase): self.assertEqual(layer.kernel.constraint, k_constraint) self.assertEqual(layer.bias.constraint, b_constraint) + def test_conv3dtranspose_dynamic_shape(self): + with self.session(use_gpu=True): + # Won't raise error here. + layer = keras.layers.Conv3DTranspose(3, 3, data_format='channels_last') + layer.build((None, None, None, None, 1)) + layer1 = keras.layers.Conv3DTranspose(3, 3, data_format='channels_first') + layer1.build((None, 1, None, None, None)) + class SeparableConv1DTest(test.TestCase): @@ -557,6 +565,14 @@ class Conv3DTest(test.TestCase): self.assertEqual(layer.kernel.constraint, k_constraint) self.assertEqual(layer.bias.constraint, b_constraint) + def test_conv3d_dynamic_shape(self): + with self.session(use_gpu=True): + # Won't raise error here. + layer = keras.layers.Conv3D(3, 3, data_format='channels_last') + layer.build((None, None, None, None, 1)) + layer1 = keras.layers.Conv3D(3, 3, data_format='channels_first') + layer1.build((None, 1, None, None, None)) + class ZeroPaddingTest(test.TestCase): -- GitLab From 65344d0a7ebd21c71b3f3ed7cb091541504b659a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 1 Nov 2018 09:36:09 +0800 Subject: [PATCH 0006/1095] TST: use testing_utils.layer_test instead --- .../python/keras/layers/convolutional_test.py | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 63fb60ebaf..7d8051e596 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -339,10 +339,16 @@ class Conv3DTransposeTest(test.TestCase): def test_conv3dtranspose_dynamic_shape(self): with self.session(use_gpu=True): # Won't raise error here. - layer = keras.layers.Conv3DTranspose(3, 3, data_format='channels_last') - layer.build((None, None, None, None, 1)) - layer1 = keras.layers.Conv3DTranspose(3, 3, data_format='channels_first') - layer1.build((None, 1, None, None, None)) + testing_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs={'data_format': 'channels_first', + 'filters': 3, 'kernel_size': 3}, + input_shape=(None, 1, None, None, None)) + testing_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs={'data_format': 'channels_last', + 'filters': 3, 'kernel_size': 3}, + input_shape=(None, None, None, None, 1)) class SeparableConv1DTest(test.TestCase): @@ -568,10 +574,16 @@ class Conv3DTest(test.TestCase): def test_conv3d_dynamic_shape(self): with self.session(use_gpu=True): # Won't raise error here. - layer = keras.layers.Conv3D(3, 3, data_format='channels_last') - layer.build((None, None, None, None, 1)) - layer1 = keras.layers.Conv3D(3, 3, data_format='channels_first') - layer1.build((None, 1, None, None, None)) + testing_utils.layer_test( + keras.layers.Conv3D, + kwargs={'data_format': 'channels_first', + 'filters': 3, 'kernel_size': 3}, + input_shape=(None, 1, None, None, None)) + testing_utils.layer_test( + keras.layers.Conv3D, + kwargs={'data_format': 'channels_last', + 'filters': 3, 'kernel_size': 3}, + input_shape=(None, None, None, None, 1)) class ZeroPaddingTest(test.TestCase): -- GitLab From 35f20ca9e794b455777a52bf70b5f7d79ae60455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 1 Nov 2018 10:11:14 +0800 Subject: [PATCH 0007/1095] TST: add input_data --- .../python/keras/layers/convolutional_test.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 7d8051e596..737e12a2bd 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -337,18 +337,21 @@ class Conv3DTransposeTest(test.TestCase): self.assertEqual(layer.bias.constraint, b_constraint) def test_conv3dtranspose_dynamic_shape(self): + input_data = np.random.random((1, 3, 3, 3, 3)) with self.session(use_gpu=True): # Won't raise error here. testing_utils.layer_test( keras.layers.Conv3DTranspose, kwargs={'data_format': 'channels_first', 'filters': 3, 'kernel_size': 3}, - input_shape=(None, 1, None, None, None)) + input_shape=(None, 3, None, None, None), + input_data=input_data) testing_utils.layer_test( keras.layers.Conv3DTranspose, kwargs={'data_format': 'channels_last', 'filters': 3, 'kernel_size': 3}, - input_shape=(None, None, None, None, 1)) + input_shape=(None, None, None, None, 3), + input_data=input_data) class SeparableConv1DTest(test.TestCase): @@ -572,18 +575,21 @@ class Conv3DTest(test.TestCase): self.assertEqual(layer.bias.constraint, b_constraint) def test_conv3d_dynamic_shape(self): + input_data = np.random.random((1, 3, 3, 3, 3)) with self.session(use_gpu=True): # Won't raise error here. testing_utils.layer_test( keras.layers.Conv3D, kwargs={'data_format': 'channels_first', 'filters': 3, 'kernel_size': 3}, - input_shape=(None, 1, None, None, None)) + input_shape=(None, 3, None, None, None), + input_data=input_data) testing_utils.layer_test( keras.layers.Conv3D, kwargs={'data_format': 'channels_last', 'filters': 3, 'kernel_size': 3}, - input_shape=(None, None, None, None, 1)) + input_shape=(None, None, None, None, 3), + input_data=input_data) class ZeroPaddingTest(test.TestCase): -- GitLab From 8de3a6ee6c7b498e32e8c22c6631a3c0a7a4af86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 1 Nov 2018 10:35:48 +0800 Subject: [PATCH 0008/1095] TST: data_format=first only run for gpu --- .../python/keras/layers/convolutional_test.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/keras/layers/convolutional_test.py b/tensorflow/python/keras/layers/convolutional_test.py index 737e12a2bd..ecee080937 100644 --- a/tensorflow/python/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/layers/convolutional_test.py @@ -337,21 +337,22 @@ class Conv3DTransposeTest(test.TestCase): self.assertEqual(layer.bias.constraint, b_constraint) def test_conv3dtranspose_dynamic_shape(self): - input_data = np.random.random((1, 3, 3, 3, 3)) + input_data = np.random.random((1, 3, 3, 3, 3)).astype(np.float32) with self.session(use_gpu=True): # Won't raise error here. - testing_utils.layer_test( - keras.layers.Conv3DTranspose, - kwargs={'data_format': 'channels_first', - 'filters': 3, 'kernel_size': 3}, - input_shape=(None, 3, None, None, None), - input_data=input_data) testing_utils.layer_test( keras.layers.Conv3DTranspose, kwargs={'data_format': 'channels_last', 'filters': 3, 'kernel_size': 3}, input_shape=(None, None, None, None, 3), input_data=input_data) + if test.is_gpu_available(cuda_only=True): + testing_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs={'data_format': 'channels_first', + 'filters': 3, 'kernel_size': 3}, + input_shape=(None, 3, None, None, None), + input_data=input_data) class SeparableConv1DTest(test.TestCase): @@ -575,21 +576,22 @@ class Conv3DTest(test.TestCase): self.assertEqual(layer.bias.constraint, b_constraint) def test_conv3d_dynamic_shape(self): - input_data = np.random.random((1, 3, 3, 3, 3)) + input_data = np.random.random((1, 3, 3, 3, 3)).astype(np.float32) with self.session(use_gpu=True): # Won't raise error here. - testing_utils.layer_test( - keras.layers.Conv3D, - kwargs={'data_format': 'channels_first', - 'filters': 3, 'kernel_size': 3}, - input_shape=(None, 3, None, None, None), - input_data=input_data) testing_utils.layer_test( keras.layers.Conv3D, kwargs={'data_format': 'channels_last', 'filters': 3, 'kernel_size': 3}, input_shape=(None, None, None, None, 3), input_data=input_data) + if test.is_gpu_available(cuda_only=True): + testing_utils.layer_test( + keras.layers.Conv3D, + kwargs={'data_format': 'channels_first', + 'filters': 3, 'kernel_size': 3}, + input_shape=(None, 3, None, None, None), + input_data=input_data) class ZeroPaddingTest(test.TestCase): -- GitLab From 42321707242771cf28deb1d577dfdd6a17e9eae9 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 12 Nov 2018 15:24:22 -0800 Subject: [PATCH 0009/1095] fix issues related to clang format check --- .../core/kernels/mkl_fused_batch_norm_op.cc | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index ff46e75a36..685db657e2 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -13,15 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #ifdef INTEL_MKL - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "mkldnn.hpp" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/tensor_format.h" -#include "mkldnn.hpp" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" using mkldnn::batch_normalization_backward; using mkldnn::batch_normalization_forward; @@ -705,9 +704,9 @@ class MklFusedBatchNormOp : public OpKernel { std::memcpy(batch_variance_data, variance_data, depth_ * sizeof(T)); } } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); @@ -1029,9 +1028,9 @@ class MklFusedBatchNormGradOp : public OpKernel { reinterpret_cast(diff_weights_data + depth_), depth_ * sizeof(T)); } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); -- GitLab From d45cd461242e3a27e505a67c17d5ddb7f4a84641 Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Fri, 30 Nov 2018 14:46:12 +0100 Subject: [PATCH 0010/1095] Add drop_remainder argument to bucket_by_sequence_length `tf.data.experimental.bucket_by_sequence_length` does not allow to drop the last batch in case it has fewer than `batch_size` elements. This patch does implement `drop_remainder` for `bucket_by_sequence_length` to enable thhis behaviour. `drop_remainder` is optinal and set to `False` by default to maintain compatibility. --- tensorflow/python/data/experimental/ops/grouping.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/data/experimental/ops/grouping.py b/tensorflow/python/data/experimental/ops/grouping.py index db10ea3b7f..71e4b3391f 100644 --- a/tensorflow/python/data/experimental/ops/grouping.py +++ b/tensorflow/python/data/experimental/ops/grouping.py @@ -130,7 +130,8 @@ def bucket_by_sequence_length(element_length_func, padded_shapes=None, padding_values=None, pad_to_bucket_boundary=False, - no_padding=False): + no_padding=False, + drop_remainder=False): """A transformation that buckets elements in a `Dataset` by length. Elements of the `Dataset` are grouped together by length and then are padded @@ -160,6 +161,10 @@ def bucket_by_sequence_length(element_length_func, any elements with length longer than `max(bucket_boundaries)`. no_padding: `bool`, indicates whether to pad the batch features (features need to be either of type `tf.SparseTensor` or of same shape). + drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing + whether the last batch should be dropped in the case its has fewer than + batch_size` elements; the default behavior is not to drop the smaller + batch. Returns: A `Dataset` transformation function, which can be passed to @@ -209,7 +214,7 @@ def bucket_by_sequence_length(element_length_func, """Batch elements in dataset.""" batch_size = window_size_fn(bucket_id) if no_padding: - return grouped_dataset.batch(batch_size) + return grouped_dataset.batch(batch_size, drop_remainder=drop_remainder) none_filler = None if pad_to_bucket_boundary: err_msg = ("When pad_to_bucket_boundary=True, elements must have " @@ -227,7 +232,8 @@ def bucket_by_sequence_length(element_length_func, shapes = make_padded_shapes( padded_shapes or grouped_dataset.output_shapes, none_filler=none_filler) - return grouped_dataset.padded_batch(batch_size, shapes, padding_values) + return grouped_dataset.padded_batch(batch_size, shapes, padding_values, + drop_remainder=drop_remainder) def _apply_fn(dataset): return dataset.apply( -- GitLab From 916c5238d864e04861a58b3f77b9d29cca03b0e0 Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Sun, 2 Dec 2018 18:27:55 +0100 Subject: [PATCH 0011/1095] Add drop_reminder support to the test of bucketing of sparse tensors --- .../bucket_by_sequence_length_test.py | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py index af20e50fb9..fb2d1cf63c 100644 --- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py @@ -274,11 +274,16 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): dataset = dataset.map(_to_sparse_tensor) return dataset - def _compute_expected_batches(): + def _compute_expected_batches(drop_remainder): """Computes expected batch outputs and stores in a set.""" all_expected_sparse_tensors = set() for bucket_start_len in range(min_len, max_len, bucket_size): - for batch_offset in range(0, bucket_size, batch_size): + if drop_remainder: + batch_offsets = [0] + else: + batch_offsets = range(0, bucket_size, batch_size) + + for batch_offset in batch_offsets: batch_start_len = bucket_start_len + batch_offset batch_end_len = min(batch_start_len + batch_size, bucket_start_len + bucket_size) @@ -306,16 +311,18 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): all_sparse_tensors.add(sprs_tensor) return all_sparse_tensors - dataset = _build_dataset() - boundaries = range(min_len + bucket_size + 1, max_len, bucket_size) - dataset = dataset.apply(grouping.bucket_by_sequence_length( - _element_length_fn, - boundaries, - [batch_size] * (len(boundaries) + 1), - no_padding=True)) - batches = _compute_batches(dataset) - expected_batches = _compute_expected_batches() - self.assertEqual(batches, expected_batches) + for drop_remainder in (True, False): + dataset = _build_dataset() + boundaries = range(min_len + bucket_size + 1, max_len, bucket_size) + dataset = dataset.apply(grouping.bucket_by_sequence_length( + _element_length_fn, + boundaries, + [batch_size] * (len(boundaries) + 1), + no_padding=True, + drop_remainder=drop_remainder)) + batches = _compute_batches(dataset) + expected_batches = _compute_expected_batches() + self.assertEqual(batches, expected_batches) if __name__ == "__main__": -- GitLab From d372f19f132e8f037390a9a2a6fde7bafa2620b9 Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Sun, 2 Dec 2018 18:29:03 +0100 Subject: [PATCH 0012/1095] Add a drop_reminder version of the testBucket test Squeezing both cases into a single test would make it way to complicated. Therefore, I created a separate test. --- .../bucket_by_sequence_length_test.py | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py index fb2d1cf63c..7b2e922d55 100644 --- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py @@ -71,6 +71,117 @@ def _get_record_shape(sparse): class BucketBySequenceLengthTest(test_base.DatasetTestBase): + def testBucketDropReminder(self): + + boundaries = [10, 20, 30] + batch_sizes = [10, 8, 4, 2] + lengths = [8, 13, 25, 35] + + n_bucket_elements = [28, 7, 6, 5] + n_expected_batches = 5 + + # Expected sequence lengths of the individual batches. + expected_lengths = [] + + # Expected sum of all batches with an equal sequence length. + # : + expected_sums = dict() + + # Expected batch sizes of batches depending on the sequence length. + # : [batch1_size, ..., batchN_size] + expected_batch_sizes = dict() + + for length, batch_size, bucket_elements in zip(lengths, batch_sizes, n_bucket_elements): + # Calculate the expected sum across all batches of a specific sequence length. + expected_sums[length] = (bucket_elements - bucket_elements % batch_size) * length + # Calculate the expected occurrence of individual batch sizes. + expected_batch_sizes[length] = [batch_size] * (bucket_elements // batch_size) + # Calculate the expected occurence of individual sequence lengths. + expected_lengths.extend([length] * (bucket_elements // batch_size)) + + def build_dataset(sparse): + def _generator(): + # Produce 1 batch for each bucket + elements = [] + for bucket_elements, length in zip(n_bucket_elements, lengths): + # Using only full sequences (opposed to the strategy employed in `testBucket`) makes + # checking the sum a lot easier. + record_len = length + for _ in range(bucket_elements): + elements.append([1] * record_len) + random.shuffle(elements) + for el in elements: + yield (_format_record(el, sparse),) + dataset = dataset_ops.Dataset.from_generator( + _generator, + (_get_record_type(sparse),), + (_get_record_shape(sparse),)) + if sparse: + dataset = dataset.map(lambda x: (_to_sparse_tensor(x),)) + return dataset + + def _test_bucket_by_padding(no_padding): + dataset = build_dataset(sparse=no_padding) + dataset = dataset.apply( + grouping.bucket_by_sequence_length( + _element_length_fn, + boundaries, + batch_sizes, + no_padding=no_padding, + drop_remainder=True)) + batch, = dataset.make_one_shot_iterator().get_next() + + with self.cached_session() as sess: + batches = [] + for _ in range(n_expected_batches): + batches.append(self.evaluate(batch)) + with self.assertRaises(errors.OutOfRangeError): + self.evaluate(batch) + + generated_lengths = [] + + # : + generated_sums = dict() + + # : [, ...] + generated_batch_sizes = dict() + + for length, batch_size, bucket_elements in zip(lengths, batch_sizes, n_bucket_elements): + # Initialize the sum across all batches. + expected_sums[length] = 0 + # Initialize the individual batch sizes. + expected_batch_sizes[length] = [] + + for batch in batches: + shape = batch.dense_shape if no_padding else batch.shape + length = shape[1] + generated_lengths.append(length) + + batch_size = shape[0] + generated_batch_sizes[length].append(batch_size) + + batch_sum = batch.values.sum() if no_padding else batch.sum() + generated_sums[length] += batch_sum + + for l in lengths: + # Make sure the sum of the batch contents is correct for the individual sequence lengths. + self.assertEqual(generated_sums[l], expected_sums[l], + 'Tensor sums did not match! expected: {}, generated: {}' + .format(expected_sums, generated_sums)) + + # Make sure the individual batch sizes are generated as expected. + self.assertEqual(sorted(generated_batch_sizes[l]), sorted(expected_batch_sizes[l]), + 'Batch-sizes did not match! expected: {}, generated: {}' + .format(sorted(expected_batch_sizes[l]), sorted(generated_batch_sizes[l]))) + + # Make sure the generated sequence lengths appear as often as expected. + self.assertEqual(sorted(generated_lengths), sorted(expected_lengths), + 'The generated sequence lengths did not match! expected: {}, generated: {}' + .format(sorted(expected_lengths), sorted(generated_lengths))) + + for no_padding in (True, False): + _test_bucket_by_padding(no_padding) + def testBucket(self): boundaries = [10, 20, 30] -- GitLab From 68426789549cb9d2edc8726fc5edabf4f221bd9b Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Sun, 2 Dec 2018 18:40:12 +0100 Subject: [PATCH 0013/1095] Fix pylint warnings * Missing argument drop_remainder in the testBucketSparse test. * Insert line breaks --- .../bucket_by_sequence_length_test.py | 45 ++++++++++++------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py index 7b2e922d55..5b96101b7c 100644 --- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py @@ -91,11 +91,15 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): # : [batch1_size, ..., batchN_size] expected_batch_sizes = dict() - for length, batch_size, bucket_elements in zip(lengths, batch_sizes, n_bucket_elements): + for length, batch_size, bucket_elements in zip(lengths, + batch_sizes, + n_bucket_elements): # Calculate the expected sum across all batches of a specific sequence length. - expected_sums[length] = (bucket_elements - bucket_elements % batch_size) * length + expected_sums[length] = \ + (bucket_elements - bucket_elements % batch_size) * length # Calculate the expected occurrence of individual batch sizes. - expected_batch_sizes[length] = [batch_size] * (bucket_elements // batch_size) + expected_batch_sizes[length] = \ + [batch_size] * (bucket_elements // batch_size) # Calculate the expected occurence of individual sequence lengths. expected_lengths.extend([length] * (bucket_elements // batch_size)) @@ -146,11 +150,13 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): # : [, ...] generated_batch_sizes = dict() - for length, batch_size, bucket_elements in zip(lengths, batch_sizes, n_bucket_elements): - # Initialize the sum across all batches. - expected_sums[length] = 0 - # Initialize the individual batch sizes. - expected_batch_sizes[length] = [] + for length, batch_size, bucket_elements in zip(lengths, + batch_sizes, + n_bucket_elements): + # Initialize the sum across all batches. + expected_sums[length] = 0 + # Initialize the individual batch sizes. + expected_batch_sizes[length] = [] for batch in batches: shape = batch.dense_shape if no_padding else batch.shape @@ -165,19 +171,26 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): for l in lengths: # Make sure the sum of the batch contents is correct for the individual sequence lengths. - self.assertEqual(generated_sums[l], expected_sums[l], - 'Tensor sums did not match! expected: {}, generated: {}' + self.assertEqual(generated_sums[l], + expected_sums[l], + 'Tensor sums did not match! ' + 'expected: {}, generated: {}' .format(expected_sums, generated_sums)) # Make sure the individual batch sizes are generated as expected. - self.assertEqual(sorted(generated_batch_sizes[l]), sorted(expected_batch_sizes[l]), - 'Batch-sizes did not match! expected: {}, generated: {}' - .format(sorted(expected_batch_sizes[l]), sorted(generated_batch_sizes[l]))) + self.assertEqual(sorted(generated_batch_sizes[l]), + sorted(expected_batch_sizes[l]), + 'Batch-sizes did not match! ' + 'expected: {}, generated: {}' + .format(sorted(expected_batch_sizes[l]), + sorted(generated_batch_sizes[l]))) # Make sure the generated sequence lengths appear as often as expected. self.assertEqual(sorted(generated_lengths), sorted(expected_lengths), - 'The generated sequence lengths did not match! expected: {}, generated: {}' - .format(sorted(expected_lengths), sorted(generated_lengths))) + 'The generated sequence lengths did not match! ' + 'expected: {}, generated: {}' + .format(sorted(expected_lengths), + sorted(generated_lengths))) for no_padding in (True, False): _test_bucket_by_padding(no_padding) @@ -432,7 +445,7 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): no_padding=True, drop_remainder=drop_remainder)) batches = _compute_batches(dataset) - expected_batches = _compute_expected_batches() + expected_batches = _compute_expected_batches(drop_remainder) self.assertEqual(batches, expected_batches) -- GitLab From 4ddd2ab07fdfea81a53d110fee19bb88c41251a0 Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Mon, 3 Dec 2018 13:58:58 +0100 Subject: [PATCH 0014/1095] Fix copy paste bug Pasted the wrong variable names from the docker test image. --- .../kernel_tests/bucket_by_sequence_length_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py index 5b96101b7c..fab79619a0 100644 --- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py @@ -154,9 +154,9 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): batch_sizes, n_bucket_elements): # Initialize the sum across all batches. - expected_sums[length] = 0 + generated_sums[length] = 0 # Initialize the individual batch sizes. - expected_batch_sizes[length] = [] + generated_batch_sizes[length] = [] for batch in batches: shape = batch.dense_shape if no_padding else batch.shape -- GitLab From 562b078b836b215b761fc91a177937cfcbdd0ea0 Mon Sep 17 00:00:00 2001 From: Nayana-ibm Date: Thu, 6 Dec 2018 11:13:12 -0500 Subject: [PATCH 0015/1095] resolve ImportError: cannot import name cloud on s390x - skip import clound --- tensorflow/contrib/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 4f1a2a5693..a23e331f4f 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -20,13 +20,14 @@ from __future__ import division from __future__ import print_function import os +import platform # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import autograph from tensorflow.contrib import batching from tensorflow.contrib import bayesflow from tensorflow.contrib import checkpoint -if os.name != "nt": +if os.name != "nt" and platform.machine() != "s390x": from tensorflow.contrib import cloud from tensorflow.contrib import cluster_resolver from tensorflow.contrib import coder -- GitLab From aff52125a0d172f4c3ae8ea7c170ca0f6d97e97d Mon Sep 17 00:00:00 2001 From: Andy Craze Date: Fri, 7 Dec 2018 20:37:37 -0800 Subject: [PATCH 0016/1095] Update tables_initializer to link to guide Resolves #20629 --- tensorflow/python/ops/lookup_ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index 758cb8041d..9302696a45 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -64,6 +64,8 @@ def initialize_all_tables(name="init_all_tables"): @tf_export(v1=["initializers.tables_initializer", "tables_initializer"]) def tables_initializer(name="init_all_tables"): """Returns an Op that initializes all tables of the default graph. + See the [Low Level Intro](https://www.tensorflow.org/guide/low_level_intro#feature_columns) + guide, for an example of usage. Args: name: Optional name for the initialization op. -- GitLab From 035955852708fba07565ead298cd54ad64ab1a55 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Sun, 9 Dec 2018 18:33:50 +0800 Subject: [PATCH 0017/1095] static lib name change --- tensorflow/contrib/cmake/external/abseil_cpp.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index 8b76f37858..539c5cbb76 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -48,7 +48,7 @@ else (systemlib_ABSEIL_CPP) set(abseil_cpp_STATIC_LIBRARIES ${abseil_cpp_BUILD}/absl/base/Release/absl_base.lib ${abseil_cpp_BUILD}/absl/base/Release/absl_dynamic_annotations.lib - ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_malloc_internal.lib + ${abseil_cpp_BUILD}/absl/base/Release/absl_malloc_internal.lib ${abseil_cpp_BUILD}/absl/strings/Release/absl_strings.lib ${abseil_cpp_BUILD}/absl/strings/Release/str_format_internal.lib ${abseil_cpp_BUILD}/absl/types/Release/absl_bad_optional_access.lib) @@ -94,6 +94,8 @@ else (systemlib_ABSEIL_CPP) ) include_directories(${abseil_cpp_INCLUDE_DIR}) + message(STATUS ${abseil_cpp_INCLUDE_DIR}) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${abseil_cpp_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build) -- GitLab From 5a4871af05392c2e5744515a4c67bc6f0f420943 Mon Sep 17 00:00:00 2001 From: fo40225 Date: Sat, 1 Sep 2018 00:58:14 +0800 Subject: [PATCH 0018/1095] fix AttributeError: 'module' object has no attribute '???'on windows python 2.7 --- tensorflow/api_template_v1.__init__.py | 3 ++- tensorflow/contrib/cmake/python_modules.txt | 3 +++ tensorflow/contrib/cmake/tf_python.cmake | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py index 65bdb6cb1b..b9b21bad50 100644 --- a/tensorflow/api_template_v1.__init__.py +++ b/tensorflow/api_template_v1.__init__.py @@ -40,7 +40,8 @@ if '__all__' in vars(): vars()['__all__'].append('contrib') from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top -app.flags = flags # pylint: disable=undefined-variable +from tensorflow.python.platform import app # pylint: disable=g-import-not-at-top +app.flags = flags # Make sure directory containing top level submodules is in # the __path__ so that "from tensorflow.foo import bar" works. diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index 96160568fa..21ae9a08a6 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -1,6 +1,9 @@ # python_sanity_test.py will complain about invalid or missing entries # problematic entries can be commented for temporary whitelisting tensorflow +tensorflow/compiler +tensorflow/compiler/xla +tensorflow/compiler/xla/service tensorflow/core tensorflow/core/example tensorflow/core/framework diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 8faccf8d55..1fe8795ddf 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -802,6 +802,7 @@ add_custom_command( # tensorflow/__init__.py depends on files generated in this step. So, remove it while # this step is running since the files aren't there yet. COMMAND ${CMAKE_COMMAND} -E remove -f ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py + COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/__init__.py # Run create_python_api.py to generate API init files. COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${CMAKE_CURRENT_BINARY_DIR}/tf_python "${PY_RUNTIME_ENV}" ${PYTHON_EXECUTABLE} -- GitLab From cc69b82bbcc68c21db839cca4f6fec043f6005aa Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Sun, 9 Dec 2018 19:08:32 +0800 Subject: [PATCH 0019/1095] abseil use master branch --- tensorflow/contrib/cmake/external/abseil_cpp.cmake | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index 539c5cbb76..f64b60fd5e 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -39,8 +39,8 @@ else (systemlib_ABSEIL_CPP) include (ExternalProject) set(abseil_cpp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) - set(abseil_cpp_URL https://github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz) - set(abseil_cpp_HASH SHA256=84043ed402d2a2a6ba4cdddb7e85118b1158fd81fe4ac3a14adc343d054c1e2e) + set(abseil_cpp_URL https://github.com/abseil/abseil-cpp.git) + set(abseil_cpp_TAG master) set(abseil_cpp_BUILD ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) if(WIN32) @@ -48,7 +48,7 @@ else (systemlib_ABSEIL_CPP) set(abseil_cpp_STATIC_LIBRARIES ${abseil_cpp_BUILD}/absl/base/Release/absl_base.lib ${abseil_cpp_BUILD}/absl/base/Release/absl_dynamic_annotations.lib - ${abseil_cpp_BUILD}/absl/base/Release/absl_malloc_internal.lib + ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_malloc_internal.lib ${abseil_cpp_BUILD}/absl/strings/Release/absl_strings.lib ${abseil_cpp_BUILD}/absl/strings/Release/str_format_internal.lib ${abseil_cpp_BUILD}/absl/types/Release/absl_bad_optional_access.lib) @@ -79,8 +79,7 @@ else (systemlib_ABSEIL_CPP) ExternalProject_Add(abseil_cpp_build PREFIX abseil_cpp - URL ${abseil_cpp_URL} - URL_HASH ${abseil_cpp_HASH} + GIT_REPOSITORY ${abseil_cpp_URL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS ${abseil_cpp_STATIC_LIBRARIES} -- GitLab From 956b77a2a8f8db7a57b818bc0c06669fd395d56d Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Sun, 9 Dec 2018 19:31:05 +0800 Subject: [PATCH 0020/1095] abseil lib linkage update --- tensorflow/contrib/cmake/external/abseil_cpp.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index f64b60fd5e..eefa7d3f03 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -49,6 +49,8 @@ else (systemlib_ABSEIL_CPP) ${abseil_cpp_BUILD}/absl/base/Release/absl_base.lib ${abseil_cpp_BUILD}/absl/base/Release/absl_dynamic_annotations.lib ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_malloc_internal.lib + ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_throw_delegate.lib + ${abseil_cpp_BUILD}/absl/numeric/Release/absl_int128.lib ${abseil_cpp_BUILD}/absl/strings/Release/absl_strings.lib ${abseil_cpp_BUILD}/absl/strings/Release/str_format_internal.lib ${abseil_cpp_BUILD}/absl/types/Release/absl_bad_optional_access.lib) -- GitLab From 7578e120de2a3a5282ced8d41881f19363f83466 Mon Sep 17 00:00:00 2001 From: Dan Jarvis Date: Thu, 23 Nov 2017 13:06:02 -0500 Subject: [PATCH 0021/1095] Fix crash on closing the app when classifier failed to initialize When testing on an API 21 emulator, the classifier fails to initialize. `E/TfLiteCameraDemo: Failed to initialize an image classifier.` In this situation, the app crashes when pressing Back to exit. Here's the cause: ``` java.lang.NullPointerException: Attempt to invoke virtual method 'void com.example.android.tflitecamerademo.ImageClassifier.close()' on a null object reference at com.example.android.tflitecamerademo.Camera2BasicFragment.onDestroy(Camera2BasicFragment.java:331) at android.app.Fragment.performDestroy(Fragment.java:2266) ``` The fix is to check for null before calling `.close()`. I'll investigate why the classifier is failing to initialize separately. :-) --- .../android/tflitecamerademo/Camera2BasicFragment.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java index 165d335101..a7b3440536 100644 --- a/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java +++ b/tensorflow/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java @@ -476,7 +476,9 @@ public class Camera2BasicFragment extends Fragment @Override public void onDestroy() { - classifier.close(); + if (classifier != null) { + classifier.close(); + } super.onDestroy(); } -- GitLab From 16380e05087b41c39547e3f05e1ce85cea44efb2 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 11 Dec 2018 00:40:26 +0000 Subject: [PATCH 0022/1095] Fix warning caused by to_int32 While running tf.keras I noticed the following warning: ``` WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/math_ops.py:3064: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version. Instructions for updating: Use tf.cast instead. ``` This fix fixes the warning caused by deprecated to_int32. Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index e2b634ee8f..1d85fb0193 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -3061,8 +3061,8 @@ def reduced_shape(input_shape, axes): input_shape[axes] = 1 return input_shape - input_shape = to_int32(input_shape) # [2, 3, 5, 7] - axes = to_int32(axes) # [1, 2] + input_shape = cast(input_shape, dtypes.int32) # [2, 3, 5, 7] + axes = cast(axes, dtypes.int32) # [1, 2] input_rank = array_ops.size(input_shape) # 4 axes = (axes + input_rank) % input_rank -- GitLab From 16f454f95e9d9823145d6b00a7e007afd0ea569b Mon Sep 17 00:00:00 2001 From: Luke Han Date: Tue, 11 Dec 2018 11:03:05 +0900 Subject: [PATCH 0023/1095] fix typo in scatter_nd_add docstring --- tensorflow/python/ops/state_ops.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index 3ac69c1c20..25b31c698e 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -462,9 +462,8 @@ def scatter_nd_add(ref, indices, updates, use_locking=False, name=None): updates: A `Tensor`. Must have the same type as `ref`. A tensor of updated values to add to ref. use_locking: An optional `bool`. Defaults to `False`. - An optional bool. Defaults to True. If True, the assignment will - be protected by a lock; otherwise the behavior is undefined, - but may exhibit less contention. + If True, the assignment will be protected by a lock; + otherwise the behavior is undefined, but may exhibit less contention. name: A name for the operation (optional). Returns: -- GitLab From f0f7ed323983ab2cb157bf782950b6e2238b9f6b Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Tue, 11 Dec 2018 20:35:50 +0800 Subject: [PATCH 0024/1095] add abseil time library linking --- tensorflow/contrib/cmake/external/abseil_cpp.cmake | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index b85fd48f0f..6c6a5df7f7 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -31,8 +31,8 @@ if (systemlib_ABSEIL_CPP) message(STATUS " abseil_cpp includes: ${ABSEIL_CPP_INCLUDE_DIR}") message(STATUS " abseil_cpp libraries: ${ABSEIL_CPP_LIBRARIES}") - add_custom_target(abseil_cpp) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp) + add_custom_target(abseil_cpp_build) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build) else (systemlib_ABSEIL_CPP) @@ -53,6 +53,7 @@ else (systemlib_ABSEIL_CPP) ${abseil_cpp_BUILD}/absl/numeric/Release/absl_int128.lib ${abseil_cpp_BUILD}/absl/strings/Release/absl_strings.lib ${abseil_cpp_BUILD}/absl/strings/Release/str_format_internal.lib + ${abseil_cpp_BUILD}/absl/time/Release/absl_time.lib ${abseil_cpp_BUILD}/absl/types/Release/absl_bad_optional_access.lib) else() set(abseil_cpp_STATIC_LIBRARIES @@ -64,6 +65,7 @@ else (systemlib_ABSEIL_CPP) ${abseil_cpp_BUILD}/absl/numeric/absl_int128.lib ${abseil_cpp_BUILD}/absl/strings/absl_strings.lib ${abseil_cpp_BUILD}/absl/strings/str_format_internal.lib + ${abseil_cpp_BUILD}/absl/time/absl_time.lib ${abseil_cpp_BUILD}/absl/types/absl_bad_optional_access.lib) endif() else() @@ -76,14 +78,18 @@ else (systemlib_ABSEIL_CPP) ${abseil_cpp_BUILD}/absl/numeric/libabsl_int128.a ${abseil_cpp_BUILD}/absl/strings/libabsl_strings.a ${abseil_cpp_BUILD}/absl/strings/libstr_format_internal.a + ${abseil_cpp_BUILD}/absl/time/libabsl_time.a ${abseil_cpp_BUILD}/absl/types/libabsl_bad_optional_access.a) endif() - ExternalProject_Add(abseil_cpp + ExternalProject_Add(abseil_cpp_build PREFIX abseil_cpp GIT_REPOSITORY ${abseil_cpp_URL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" + BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS ${abseil_cpp_STATIC_LIBRARIES} + BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release + COMMAND ${CMAKE_COMMAND} --build . --config Release INSTALL_COMMAND "" CMAKE_CACHE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE} @@ -96,6 +102,6 @@ else (systemlib_ABSEIL_CPP) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${abseil_cpp_STATIC_LIBRARIES}) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build) endif (systemlib_ABSEIL_CPP) \ No newline at end of file -- GitLab From 568db5fb164092eaf2863f0e4a2fdb48533d76f9 Mon Sep 17 00:00:00 2001 From: Siju Samuel Date: Mon, 19 Nov 2018 07:20:36 +0530 Subject: [PATCH 0025/1095] Abs, Ceil ops initial --- tensorflow/lite/build_def.bzl | 1 + tensorflow/lite/builtin_ops.h | 1 + .../lite/core/api/flatbuffer_conversions.cc | 2 + .../lite/delegates/nnapi/nnapi_delegate.cc | 20 ++++ .../delegates/nnapi/nnapi_delegate_test.cc | 109 ++++++++++++++++++ .../writer/option_writer_generator.cc | 2 + tensorflow/lite/g3doc/tf_ops_compatibility.md | 22 ++++ tensorflow/lite/kernels/BUILD | 34 ++++++ tensorflow/lite/kernels/abs.cc | 59 ++++++++++ tensorflow/lite/kernels/abs_test.cc | 94 +++++++++++++++ tensorflow/lite/kernels/ceil.cc | 59 ++++++++++ tensorflow/lite/kernels/ceil_test.cc | 83 +++++++++++++ .../internal/optimized/legacy_optimized_ops.h | 12 ++ .../internal/optimized/optimized_ops.h | 16 +++ .../internal/reference/legacy_reference_ops.h | 12 ++ .../internal/reference/reference_ops.h | 20 ++++ tensorflow/lite/kernels/register.cc | 4 + tensorflow/lite/nnapi/NeuralNetworksShim.h | 2 + tensorflow/lite/nnapi_delegate.cc | 6 + tensorflow/lite/schema/schema.fbs | 1 + tensorflow/lite/schema/schema_generated.h | 10 +- tensorflow/lite/testing/generate_examples.py | 52 +++++++++ tensorflow/lite/toco/export_tensorflow.cc | 26 +++++ .../propagate_fixed_sizes.cc | 2 + .../reorder_elementwise_unary.cc | 2 + tensorflow/lite/toco/import_tensorflow.cc | 29 +++++ tensorflow/lite/toco/model.h | 22 ++++ tensorflow/lite/toco/tflite/operator.cc | 4 + tensorflow/lite/toco/tflite/operator_test.cc | 2 + .../lite/toco/tflite/whitelisted_flex_ops.cc | 1 + tensorflow/lite/toco/tooling_util.cc | 2 + 31 files changed, 708 insertions(+), 3 deletions(-) create mode 100644 tensorflow/lite/kernels/abs.cc create mode 100644 tensorflow/lite/kernels/abs_test.cc create mode 100644 tensorflow/lite/kernels/ceil.cc create mode 100644 tensorflow/lite/kernels/ceil_test.cc diff --git a/tensorflow/lite/build_def.bzl b/tensorflow/lite/build_def.bzl index c17eddf47b..1539ac788a 100644 --- a/tensorflow/lite/build_def.bzl +++ b/tensorflow/lite/build_def.bzl @@ -228,6 +228,7 @@ def generated_test_models(): "arg_min_max", "avg_pool", "batch_to_space_nd", + "ceil", "concat", "constant", "control_dep", diff --git a/tensorflow/lite/builtin_ops.h b/tensorflow/lite/builtin_ops.h index f97d3ac4bf..0077fade40 100644 --- a/tensorflow/lite/builtin_ops.h +++ b/tensorflow/lite/builtin_ops.h @@ -128,6 +128,7 @@ typedef enum { kTfLiteBuiltinMirrorPad = 100, kTfLiteBuiltinAbs = 101, kTfLiteBuiltinSplitV = 102, + kTfLiteBuiltinCeil = 103, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index c00a0a3a54..8a436c440f 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -664,6 +664,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_EQUAL: case BuiltinOperator_EXP: case BuiltinOperator_EXPAND_DIMS: + case BuiltinOperator_ABS: + case BuiltinOperator_CEIL: case BuiltinOperator_FLOOR: case BuiltinOperator_GREATER: case BuiltinOperator_GREATER_EQUAL: diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 4fe07004a8..1a95ac1891 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -638,6 +638,26 @@ class NNAPIDelegateKernel { return nullptr; } break; + case kTfLiteBuiltinCeil: + if (version == 1) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + return ANEURALNETWORKS_CEIL; + }; + } else { + return nullptr; + } + break; + case kTfLiteBuiltinAbs: + if (version == 1) { + return [](const NNAPIOpMappingArgs& mapping_args) + -> ANeuralNetworksOperationType { + return ANEURALNETWORKS_ABS; + }; + } else { + return nullptr; + } + break; case kTfLiteBuiltinRelu: if (version == 1) { return [](const NNAPIOpMappingArgs& mapping_args) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index ca48af0c95..3050b69028 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -1024,6 +1024,115 @@ TEST(NNAPIDelegate, FloorMultiDims) { EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); } +class CeilOpModel : public SingleOpModelWithNNAPI { + public: + CeilOpModel(std::initializer_list input_shape, TensorType input_type) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_CEIL, BuiltinOptions_NONE, 0); + BuildInterpreter({ + input_shape, + }); + } + + int input() { return input_; } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int output_; +}; + +TEST(NNAPIDelegate, CeilSingleDim) { + CeilOpModel model({2}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), {8.5, 0.0}); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({8, 0})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); +} + +TEST(NNAPIDelegate, CeilMultiDims) { + CeilOpModel model({2, 1, 1, 5}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), { + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + -0.0001, + -8.0001, + -0.9999, + -9.9999, + -0.5, + }); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({1, 9, 1, 10, 1, 0, -8, 0, -9, 0})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); +} + +class AbsOpModel : public SingleOpModelWithNNAPI { + public: + AbsOpModel(std::initializer_list input_shape, TensorType input_type) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_ABS, BuiltinOptions_NONE, 0); + BuildInterpreter({ + input_shape, + }); + } + + int input() { return input_; } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int output_; +}; + +TEST(NNAPIDelegate, AbsSingleDim) { + AbsOpModel model({2}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), {8.5, -2.0}); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({8.5, 2.0})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); +} + +TEST(NNAPIDelegate, AbsMultiDims) { + AbsOpModel model({2, 1, 1, 5}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), { + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + -0.0001, + -8.0001, + -0.9999, + -9.9999, + -0.5, + }); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({ + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + })); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); +} + class LocalResponseNormOpModel : public SingleOpModelWithNNAPI { public: LocalResponseNormOpModel(std::initializer_list input_shape, int radius, diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index fa360a2f47..d79b6e8ca9 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -160,6 +160,8 @@ class OpOptionData { op_to_option_["EMBEDDING_LOOKUP"] = ""; // TODO(aselle): maybe something else. op_to_option_["FLOOR"] = ""; + op_to_option_["CEIL"] = ""; + op_to_option_["ABS"] = ""; op_to_option_["HASHTABLE_LOOKUP"] = ""; // TODO(aselle): maybe something else. op_to_option_["LOGISTIC"] = ""; diff --git a/tensorflow/lite/g3doc/tf_ops_compatibility.md b/tensorflow/lite/g3doc/tf_ops_compatibility.md index dcfda72137..dc6ba8a463 100644 --- a/tensorflow/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/lite/g3doc/tf_ops_compatibility.md @@ -362,6 +362,28 @@ Outputs { } ``` +**CEIL** + +``` +inputs { + 0: tensor +} +outputs: { + 0: result of computing element-wise ceil of the input tensor +} +``` + +**ABS** + +``` +inputs { + 0: tensor +} +outputs: { + 0: result of computing element-wise absolute value of the input tensor +} +``` + **FULLY_CONNECTED** ``` diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index bad1c4aebf..f66f69870f 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -158,6 +158,7 @@ cc_library( cc_library( name = "builtin_op_kernels", srcs = [ + "abs.cc", "activations.cc", "add.cc", "arg_min_max.cc", @@ -167,6 +168,7 @@ cc_library( "bidirectional_sequence_lstm.cc", "bidirectional_sequence_rnn.cc", "cast.cc", + "ceil.cc", "comparisons.cc", "concatenation.cc", "conv.cc", @@ -599,6 +601,38 @@ tf_cc_test( ], ) +tf_cc_test( + name = "abs_test", + size = "small", + srcs = ["abs_test.cc"], + tags = [ + "no_oss", + "tflite_not_portable_ios", + ], + deps = [ + ":builtin_ops", + "//tensorflow/lite:framework", + "//tensorflow/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + +tf_cc_test( + name = "ceil_test", + size = "small", + srcs = ["ceil_test.cc"], + tags = [ + "no_oss", + "tflite_not_portable_ios", + ], + deps = [ + ":builtin_ops", + "//tensorflow/lite:framework", + "//tensorflow/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "elementwise_test", size = "small", diff --git a/tensorflow/lite/kernels/abs.cc b/tensorflow/lite/kernels/abs.cc new file mode 100644 index 0000000000..b44f9de630 --- /dev/null +++ b/tensorflow/lite/kernels/abs.cc @@ -0,0 +1,59 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace abs { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); + output->type = input->type; + TfLiteIntArray* output_size = TfLiteIntArrayCopy(input->dims); + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + optimized_ops::Abs(GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + + return kTfLiteOk; +} +} // namespace abs + +TfLiteRegistration* Register_ABS() { + static TfLiteRegistration r = {/*init=*/nullptr, + /*free=*/nullptr, abs::Prepare, abs::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/kernels/abs_test.cc b/tensorflow/lite/kernels/abs_test.cc new file mode 100644 index 0000000000..5e3f95c43a --- /dev/null +++ b/tensorflow/lite/kernels/abs_test.cc @@ -0,0 +1,94 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class AbsOpModel : public SingleOpModel { + public: + AbsOpModel(std::initializer_list input_shape, TensorType input_type) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_ABS, BuiltinOptions_NONE, 0); + BuildInterpreter({ + input_shape, + }); + } + + int input() { return input_; } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int output_; +}; + +TEST(AbsOpTest, SingleDim) { + AbsOpModel model({2}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), {8.5, -2.0}); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({8.5, 2.0})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); +} + +TEST(AbsOpTest, MultiDims) { + AbsOpModel model({2, 1, 1, 5}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), { + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + -0.0001, + -8.0001, + -0.9999, + -9.9999, + -0.5, + }); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({ + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + })); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/lite/kernels/ceil.cc b/tensorflow/lite/kernels/ceil.cc new file mode 100644 index 0000000000..e0ea061f25 --- /dev/null +++ b/tensorflow/lite/kernels/ceil.cc @@ -0,0 +1,59 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/lite/c/c_api_internal.h" +#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/lite/kernels/internal/tensor.h" +#include "tensorflow/lite/kernels/kernel_util.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace ceil { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); + output->type = input->type; + TfLiteIntArray* output_size = TfLiteIntArrayCopy(input->dims); + return context->ResizeTensor(context, output, output_size); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + const TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + optimized_ops::Ceil(GetTensorShape(input), GetTensorData(input), + GetTensorShape(output), GetTensorData(output)); + + return kTfLiteOk; +} +} // namespace ceil + +TfLiteRegistration* Register_CEIL() { + static TfLiteRegistration r = {/*init=*/nullptr, + /*free=*/nullptr, ceil::Prepare, ceil::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/lite/kernels/ceil_test.cc b/tensorflow/lite/kernels/ceil_test.cc new file mode 100644 index 0000000000..e120105082 --- /dev/null +++ b/tensorflow/lite/kernels/ceil_test.cc @@ -0,0 +1,83 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include "tensorflow/lite/interpreter.h" +#include "tensorflow/lite/kernels/register.h" +#include "tensorflow/lite/kernels/test_util.h" +#include "tensorflow/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class CeilOpModel : public SingleOpModel { + public: + CeilOpModel(std::initializer_list input_shape, TensorType input_type) { + input_ = AddInput(TensorType_FLOAT32); + output_ = AddOutput(TensorType_FLOAT32); + SetBuiltinOp(BuiltinOperator_CEIL, BuiltinOptions_NONE, 0); + BuildInterpreter({ + input_shape, + }); + } + + int input() { return input_; } + + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + private: + int input_; + int output_; +}; + +TEST(CeilOpTest, SingleDim) { + CeilOpModel model({2}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), {8.5, 0.0}); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), ElementsAreArray({9, 0})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); +} + +TEST(CeilOpTest, MultiDims) { + CeilOpModel model({2, 1, 1, 5}, TensorType_FLOAT32); + model.PopulateTensor(model.input(), { + 0.0001, + 8.0001, + 0.9999, + 9.9999, + 0.5, + -0.0001, + -8.0001, + -0.9999, + -9.9999, + -0.5, + }); + model.Invoke(); + EXPECT_THAT(model.GetOutput(), + ElementsAreArray({1, 9, 1, 10, 1, 0, -8, 0, -9, 0})); + EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h index 5485d907c2..c24b7faa21 100644 --- a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h @@ -1740,6 +1740,18 @@ inline void Floor(const float* input_data, const Dims<4>& input_dims, output_data); } +inline void Ceil(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + Ceil(DimsToShape(input_dims), input_data, DimsToShape(output_dims), + output_data); +} + +inline void Abs(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + Abs(DimsToShape(input_dims), input_data, DimsToShape(output_dims), + output_data); +} + inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, const int32* output_size_data, const Dims<4>& output_size_dims, float* output_data, diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index c79b69a22e..7f7296ae97 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -4898,6 +4898,22 @@ inline void Floor(const RuntimeShape& input_shape, const float* input_data, output_map.array() = Eigen::floor(input_map.array()); } +inline void Ceil(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + gemmlowp::ScopedProfilingLabel label("Ceil"); + auto input_map = MapAsVector(input_data, input_shape); + auto output_map = MapAsVector(output_data, output_shape); + output_map.array() = Eigen::ceil(input_map.array()); +} + +inline void Abs(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + gemmlowp::ScopedProfilingLabel label("Abs"); + auto input_map = MapAsVector(input_data, input_shape); + auto output_map = MapAsVector(output_data, output_shape); + output_map.array() = Eigen::abs(input_map.array()); +} + #ifdef USE_NEON inline void ResizeBilinearKernel(const float* input_ptr, int32 depth, float scale, float* output_ptr) { diff --git a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h index 380fc8f98e..30bb92c8b3 100644 --- a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h @@ -1883,6 +1883,18 @@ inline void Floor(const float* input_data, const Dims<4>& input_dims, output_data); } +inline void Ceil(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + Ceil(DimsToShape(input_dims), input_data, DimsToShape(output_dims), + output_data); +} + +inline void Abs(const float* input_data, const Dims<4>& input_dims, + float* output_data, const Dims<4>& output_dims) { + Abs(DimsToShape(input_dims), input_data, DimsToShape(output_dims), + output_data); +} + template inline void ResizeBilinear(const T* input_data, const Dims<4>& input_dims, const int32* output_size_data, diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h index ea3ab06da1..133a455e9e 100644 --- a/tensorflow/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h @@ -3040,6 +3040,26 @@ inline void Floor(const RuntimeShape& input_shape, const float* input_data, } } +inline void Ceil(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + int offset = i; + output_data[offset] = std::ceil(input_data[offset]); + } +} + +inline void Abs(const RuntimeShape& input_shape, const float* input_data, + const RuntimeShape& output_shape, float* output_data) { + const int flat_size = MatchingFlatSize(input_shape, output_shape); + + for (int i = 0; i < flat_size; i++) { + int offset = i; + output_data[offset] = std::fabs(input_data[offset]); + } +} + template inline void Gather(const tflite::GatherParams& op_params, const RuntimeShape& input_shape, const T* input_data, diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index c0e6f6994f..caf61e9003 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -94,6 +94,8 @@ TfLiteRegistration* Register_GREATER_EQUAL(); TfLiteRegistration* Register_LESS(); TfLiteRegistration* Register_LESS_EQUAL(); TfLiteRegistration* Register_FLOOR(); +TfLiteRegistration* Register_CEIL(); +TfLiteRegistration* Register_ABS(); TfLiteRegistration* Register_TILE(); TfLiteRegistration* Register_NEG(); TfLiteRegistration* Register_SUM(); @@ -235,6 +237,8 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_LESS, Register_LESS()); AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL()); AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR()); + AddBuiltin(BuiltinOperator_CEIL, Register_CEIL()); + AddBuiltin(BuiltinOperator_ABS, Register_ABS()); AddBuiltin(BuiltinOperator_NEG, Register_NEG()); AddBuiltin(BuiltinOperator_SELECT, Register_SELECT()); AddBuiltin(BuiltinOperator_SLICE, Register_SLICE()); diff --git a/tensorflow/lite/nnapi/NeuralNetworksShim.h b/tensorflow/lite/nnapi/NeuralNetworksShim.h index c39502f4ac..037d364c3b 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/lite/nnapi/NeuralNetworksShim.h @@ -143,6 +143,8 @@ enum { ANEURALNETWORKS_STRIDED_SLICE = 35, ANEURALNETWORKS_SUB = 36, ANEURALNETWORKS_TRANSPOSE = 37, + ANEURALNETWORKS_CEIL = 38, + ANEURALNETWORKS_ABS = 39, }; /** diff --git a/tensorflow/lite/nnapi_delegate.cc b/tensorflow/lite/nnapi_delegate.cc index 26d75696a1..6da803ce7b 100644 --- a/tensorflow/lite/nnapi_delegate.cc +++ b/tensorflow/lite/nnapi_delegate.cc @@ -489,6 +489,12 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_FLOOR: nn_op_type = ANEURALNETWORKS_FLOOR; break; + case tflite::BuiltinOperator_CEIL: + nn_op_type = ANEURALNETWORKS_CEIL; + break; + case tflite::BuiltinOperator_ABS: + nn_op_type = ANEURALNETWORKS_ABS; + break; case tflite::BuiltinOperator_LOGISTIC: nn_op_type = ANEURALNETWORKS_LOGISTIC; break; diff --git a/tensorflow/lite/schema/schema.fbs b/tensorflow/lite/schema/schema.fbs index 980f13b19b..2d27722415 100644 --- a/tensorflow/lite/schema/schema.fbs +++ b/tensorflow/lite/schema/schema.fbs @@ -205,6 +205,7 @@ enum BuiltinOperator : byte { MIRROR_PAD = 100, ABS = 101, SPLIT_V = 102, + CEIL = 103, } // Options for the builtin operators. diff --git a/tensorflow/lite/schema/schema_generated.h b/tensorflow/lite/schema/schema_generated.h index 637cbafabd..a605c19b52 100755 --- a/tensorflow/lite/schema/schema_generated.h +++ b/tensorflow/lite/schema/schema_generated.h @@ -520,11 +520,12 @@ enum BuiltinOperator { BuiltinOperator_MIRROR_PAD = 100, BuiltinOperator_ABS = 101, BuiltinOperator_SPLIT_V = 102, + BuiltinOperator_CEIL = 103, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_SPLIT_V + BuiltinOperator_MAX = BuiltinOperator_CEIL }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[102] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[103] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -627,7 +628,9 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[102] { BuiltinOperator_SQUARED_DIFFERENCE, BuiltinOperator_MIRROR_PAD, BuiltinOperator_ABS, - BuiltinOperator_SPLIT_V + BuiltinOperator_SPLIT_V, + BuiltinOperator_CEIL + }; return values; } @@ -737,6 +740,7 @@ inline const char * const *EnumNamesBuiltinOperator() { "MIRROR_PAD", "ABS", "SPLIT_V", + "CEIL", nullptr }; return names; diff --git a/tensorflow/lite/testing/generate_examples.py b/tensorflow/lite/testing/generate_examples.py index dd7b3d0745..825873a884 100644 --- a/tensorflow/lite/testing/generate_examples.py +++ b/tensorflow/lite/testing/generate_examples.py @@ -3021,6 +3021,58 @@ def make_floor_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_ceil_tests(zip_path): + """Make a set of tests to do ceil.""" + + test_parameters = [{ + "input_dtype": [tf.float32], + "input_shape": [[1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]], + }] + + def build_graph(parameters): + """Build the ceil op testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_dtype"], + name="input1", + shape=parameters["input_shape"]) + out = tf.ceil(input_value) + return [input_value], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict={inputs[0]: input_value}) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + +def make_abs_tests(zip_path): + """Make a set of tests to do abs.""" + + test_parameters = [{ + "input_dtype": [tf.float32], + "input_shape": [[1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]], + }] + + def build_graph(parameters): + """Build the abs op testing graph.""" + input_value = tf.placeholder( + dtype=parameters["input_dtype"], + name="input1", + shape=parameters["input_shape"]) + out = tf.abs(input_value) + return [input_value], [out] + + def build_inputs(parameters, sess, inputs, outputs): + input_value = create_tensor_data(parameters["input_dtype"], + parameters["input_shape"]) + return [input_value], sess.run( + outputs, feed_dict={inputs[0]: input_value}) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_neg_tests(zip_path): """Make a set of tests to do neg.""" diff --git a/tensorflow/lite/toco/export_tensorflow.cc b/tensorflow/lite/toco/export_tensorflow.cc index 9fff001552..9dfeaad164 100644 --- a/tensorflow/lite/toco/export_tensorflow.cc +++ b/tensorflow/lite/toco/export_tensorflow.cc @@ -1205,6 +1205,26 @@ void ConvertFloorOperator(const Model& model, const FloorOperator& src_op, (*floor_op->mutable_attr())["T"].set_type(DT_FLOAT); } +void ConvertCeilOperator(const Model& model, const CeilOperator& src_op, + GraphDef* tensorflow_graph) { + tensorflow::NodeDef* ceil_op = tensorflow_graph->add_node(); + ceil_op->set_op("Ceil"); + ceil_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *ceil_op->add_input() = src_op.inputs[0]; + (*ceil_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + +void ConvertAbsOperator(const Model& model, const AbsOperator& src_op, + GraphDef* tensorflow_graph) { + tensorflow::NodeDef* abs_op = tensorflow_graph->add_node(); + abs_op->set_op("Abs"); + abs_op->set_name(src_op.outputs[0]); + CHECK_EQ(src_op.inputs.size(), 1); + *abs_op->add_input() = src_op.inputs[0]; + (*abs_op->mutable_attr())["T"].set_type(DT_FLOAT); +} + void ConvertGatherOperator(const Model& model, const GatherOperator& src_op, GraphDef* tensorflow_graph) { tensorflow::NodeDef* gather_op = tensorflow_graph->add_node(); @@ -2169,6 +2189,12 @@ void ConvertOperator(const Model& model, const Operator& src_op, } else if (src_op.type == OperatorType::kFloor) { ConvertFloorOperator(model, static_cast(src_op), tensorflow_graph); + } else if (src_op.type == OperatorType::kCeil) { + ConvertCeilOperator(model, static_cast(src_op), + tensorflow_graph); + } else if (src_op.type == OperatorType::kAbs) { + ConvertAbsOperator(model, static_cast(src_op), + tensorflow_graph); } else if (src_op.type == OperatorType::kGather) { ConvertGatherOperator(model, static_cast(src_op), tensorflow_graph); diff --git a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 0e653f08a0..c6c7681c03 100644 --- a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1869,6 +1869,8 @@ void ProcessMirrorPadOperator(Model* model, MirrorPadOperator* op) { case OperatorType::kAssert: case OperatorType::kCast: case OperatorType::kFloor: + case OperatorType::kCeil: + case OperatorType::kAbs: case OperatorType::kExp: case OperatorType::kSin: case OperatorType::kLogicalAnd: diff --git a/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc b/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc index 6a4b919854..90e64816c8 100644 --- a/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc +++ b/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc @@ -29,7 +29,9 @@ namespace { bool IsElementwiseOperator(OperatorType optype) { switch (optype) { + case OperatorType::kAbs: case OperatorType::kCast: + case OperatorType::kCeil: case OperatorType::kExp: case OperatorType::kFloor: case OperatorType::kNeg: diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc index 0b2f810394..de8ae19a71 100644 --- a/tensorflow/lite/toco/import_tensorflow.cc +++ b/tensorflow/lite/toco/import_tensorflow.cc @@ -1491,6 +1491,34 @@ tensorflow::Status ConvertFloorOperator( return tensorflow::Status::OK(); } +tensorflow::Status ConvertCeilOperator( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { + CHECK_EQ(node.op(), "Ceil"); + TF_QCHECK_OK(CheckInputsCount(node, tf_import_flags, 1)); + const auto data_type = GetDataTypeAttr(node, "T"); + CHECK(data_type == DT_FLOAT); + auto* op = new CeilOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertAbsOperator( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { + CHECK_EQ(node.op(), "Abs"); + TF_QCHECK_OK(CheckInputsCount(node, tf_import_flags, 1)); + const auto data_type = GetDataTypeAttr(node, "T"); + CHECK(data_type == DT_FLOAT); + auto* op = new AbsOperator; + op->inputs.push_back(node.input(0)); + op->outputs.push_back(node.name()); + model->operators.emplace_back(op); + return tensorflow::Status::OK(); +} + tensorflow::Status ConvertGatherOperator( const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -2314,6 +2342,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() { {"BatchToSpaceND", ConvertBatchToSpaceNDOperator}, {"BiasAdd", ConvertBiasAddOperator}, {"Cast", ConvertCastOperator}, + {"Ceil", ConvertCeilOperator}, {"CheckNumerics", ConvertIdentityOperator}, {"Concat", ConvertConcatOperator}, {"ConcatV2", ConvertConcatOperator}, diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h index d392535f5c..69c6d7608a 100644 --- a/tensorflow/lite/toco/model.h +++ b/tensorflow/lite/toco/model.h @@ -37,11 +37,13 @@ using tflite::QuantizationParams; enum class OperatorType : uint8 { kNone, // General-purpose neural network operators. + kAbs, kAdd, kAddN, kAveragePool, kBatchMatMul, kBatchNormalization, + kCeil, kConv, kConcatenation, kDepthwiseConv, @@ -1658,6 +1660,26 @@ struct FloorOperator : Operator { FloorOperator() : Operator(OperatorType::kFloor) {} }; +// Ceil operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Ceil +struct CeilOperator : Operator { + CeilOperator() : Operator(OperatorType::kCeil) {} +}; + +// Abs operator. +// +// Inputs: +// inputs[0]: required: the input array +// +// TensorFlow equivalent: Abs +struct AbsOperator : Operator { + AbsOperator() : Operator(OperatorType::kAbs) {} +}; + // Gather operator. It gathers slices from params according to indices. // Only 1-D indices are supported at the moment. // diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 205af23da5..2122d04958 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -1649,6 +1649,10 @@ std::vector> BuildOperatorList( "DEQUANTIZE", OperatorType::kDequantize)); ops.push_back( MakeUnique>("FLOOR", OperatorType::kFloor)); + ops.push_back( + MakeUnique>("CEIL", OperatorType::kCeil)); + ops.push_back( + MakeUnique>("ABS", OperatorType::kAbs)); ops.push_back( MakeUnique>("RELU", OperatorType::kRelu)); ops.push_back(MakeUnique>( diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc index 14ec89cd73..e284619903 100644 --- a/tensorflow/lite/toco/tflite/operator_test.cc +++ b/tensorflow/lite/toco/tflite/operator_test.cc @@ -114,6 +114,8 @@ TEST_F(OperatorTest, SimpleOperators) { CheckSimpleOperator("DEQUANTIZE", OperatorType::kDequantize); CheckSimpleOperator("FLOOR", OperatorType::kFloor); + CheckSimpleOperator("CEIL", OperatorType::kCeil); + CheckSimpleOperator("ABS", OperatorType::kAbs); CheckSimpleOperator("RELU", OperatorType::kRelu); CheckSimpleOperator("RELU_N1_TO_1", OperatorType::kRelu1); CheckSimpleOperator("RELU6", OperatorType::kRelu6); diff --git a/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc b/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc index 039a918af1..a96222a8a2 100644 --- a/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc +++ b/tensorflow/lite/toco/tflite/whitelisted_flex_ops.cc @@ -68,6 +68,7 @@ bool IsWhitelistedFlexOp(const std::string& tensorflow_op_name) { "BroadcastArgs", "BroadcastGradientArgs", "Cast", + "Ceil", "CheckNumerics", "ComplexAbs", "Concat", diff --git a/tensorflow/lite/toco/tooling_util.cc b/tensorflow/lite/toco/tooling_util.cc index af4cd386a2..0887dca9b5 100644 --- a/tensorflow/lite/toco/tooling_util.cc +++ b/tensorflow/lite/toco/tooling_util.cc @@ -385,6 +385,8 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(ConcatV2) HANDLE_OPERATORTYPENAME_CASE(Cast) HANDLE_OPERATORTYPENAME_CASE(Floor) + HANDLE_OPERATORTYPENAME_CASE(Ceil) + HANDLE_OPERATORTYPENAME_CASE(Abs) HANDLE_OPERATORTYPENAME_CASE(Gather) HANDLE_OPERATORTYPENAME_CASE(ResizeBilinear) HANDLE_OPERATORTYPENAME_CASE(SpaceToBatchND) -- GitLab From 9e91a0899f116e8bfec46922221c45765346ce2e Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Wed, 12 Dec 2018 15:27:09 +0100 Subject: [PATCH 0026/1095] Update tensorflow.data.experimental goldens for v1 Add the drop_remainder argument for bucket_by_sequence_length to the goldens. --- .../tools/api/golden/v1/tensorflow.data.experimental.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt index ad10b82283..b8ba3e341f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.experimental.pbtxt @@ -54,7 +54,7 @@ tf_module { } member_method { name: "bucket_by_sequence_length" - argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\', \'False\'], " } member_method { name: "choose_from_datasets" -- GitLab From c266087d4d086913194bd621cad125853eca8fd6 Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Wed, 12 Dec 2018 15:29:25 +0100 Subject: [PATCH 0027/1095] Update tensorflow.data.experimental goldens for v2 Add the drop_remainder argument for bucket_by_sequence_length to the goldens. --- .../tools/api/golden/v2/tensorflow.data.experimental.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt index ad10b82283..b8ba3e341f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.experimental.pbtxt @@ -54,7 +54,7 @@ tf_module { } member_method { name: "bucket_by_sequence_length" - argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\'], " + argspec: "args=[\'element_length_func\', \'bucket_boundaries\', \'bucket_batch_sizes\', \'padded_shapes\', \'padding_values\', \'pad_to_bucket_boundary\', \'no_padding\', \'drop_remainder\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'False\', \'False\', \'False\'], " } member_method { name: "choose_from_datasets" -- GitLab From e30a0a1de0b0b51da69b3d4bf5c545142accb3c2 Mon Sep 17 00:00:00 2001 From: Siju Samuel Date: Wed, 12 Dec 2018 20:03:36 +0530 Subject: [PATCH 0028/1095] Removed abs since already committed in 5916a9f0e4b5b2c4f80767ff83a001a6f86b4395 --- .../lite/core/api/flatbuffer_conversions.cc | 1 - .../lite/delegates/nnapi/nnapi_delegate.cc | 10 -- .../delegates/nnapi/nnapi_delegate_test.cc | 60 ------------ .../writer/option_writer_generator.cc | 1 - tensorflow/lite/g3doc/tf_ops_compatibility.md | 11 --- tensorflow/lite/kernels/BUILD | 17 ---- tensorflow/lite/kernels/abs.cc | 59 ------------ tensorflow/lite/kernels/abs_test.cc | 94 ------------------- .../internal/optimized/legacy_optimized_ops.h | 6 -- .../internal/optimized/optimized_ops.h | 8 -- .../internal/reference/legacy_reference_ops.h | 6 -- .../internal/reference/reference_ops.h | 10 -- tensorflow/lite/kernels/register.cc | 2 - tensorflow/lite/nnapi/NeuralNetworksShim.h | 1 - tensorflow/lite/nnapi_delegate.cc | 3 - tensorflow/lite/testing/generate_examples.py | 26 ----- tensorflow/lite/toco/export_tensorflow.cc | 13 --- .../propagate_fixed_sizes.cc | 1 - .../reorder_elementwise_unary.cc | 1 - tensorflow/lite/toco/import_tensorflow.cc | 14 --- tensorflow/lite/toco/model.h | 11 --- tensorflow/lite/toco/tflite/operator.cc | 2 - tensorflow/lite/toco/tflite/operator_test.cc | 1 - tensorflow/lite/toco/tooling_util.cc | 1 - 24 files changed, 359 deletions(-) delete mode 100644 tensorflow/lite/kernels/abs.cc delete mode 100644 tensorflow/lite/kernels/abs_test.cc diff --git a/tensorflow/lite/core/api/flatbuffer_conversions.cc b/tensorflow/lite/core/api/flatbuffer_conversions.cc index 8a436c440f..9c8eb5a2d8 100644 --- a/tensorflow/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/lite/core/api/flatbuffer_conversions.cc @@ -664,7 +664,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_EQUAL: case BuiltinOperator_EXP: case BuiltinOperator_EXPAND_DIMS: - case BuiltinOperator_ABS: case BuiltinOperator_CEIL: case BuiltinOperator_FLOOR: case BuiltinOperator_GREATER: diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index 1a95ac1891..cac98ae3da 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -648,16 +648,6 @@ class NNAPIDelegateKernel { return nullptr; } break; - case kTfLiteBuiltinAbs: - if (version == 1) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - return ANEURALNETWORKS_ABS; - }; - } else { - return nullptr; - } - break; case kTfLiteBuiltinRelu: if (version == 1) { return [](const NNAPIOpMappingArgs& mapping_args) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index 3050b69028..f6a04e36cd 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -1073,66 +1073,6 @@ TEST(NNAPIDelegate, CeilMultiDims) { EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); } -class AbsOpModel : public SingleOpModelWithNNAPI { - public: - AbsOpModel(std::initializer_list input_shape, TensorType input_type) { - input_ = AddInput(TensorType_FLOAT32); - output_ = AddOutput(TensorType_FLOAT32); - SetBuiltinOp(BuiltinOperator_ABS, BuiltinOptions_NONE, 0); - BuildInterpreter({ - input_shape, - }); - } - - int input() { return input_; } - - std::vector GetOutput() { return ExtractVector(output_); } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - private: - int input_; - int output_; -}; - -TEST(NNAPIDelegate, AbsSingleDim) { - AbsOpModel model({2}, TensorType_FLOAT32); - model.PopulateTensor(model.input(), {8.5, -2.0}); - model.Invoke(); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({8.5, 2.0})); - EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); -} - -TEST(NNAPIDelegate, AbsMultiDims) { - AbsOpModel model({2, 1, 1, 5}, TensorType_FLOAT32); - model.PopulateTensor(model.input(), { - 0.0001, - 8.0001, - 0.9999, - 9.9999, - 0.5, - -0.0001, - -8.0001, - -0.9999, - -9.9999, - -0.5, - }); - model.Invoke(); - EXPECT_THAT(model.GetOutput(), - ElementsAreArray({ - 0.0001, - 8.0001, - 0.9999, - 9.9999, - 0.5, - 0.0001, - 8.0001, - 0.9999, - 9.9999, - 0.5, - })); - EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); -} - class LocalResponseNormOpModel : public SingleOpModelWithNNAPI { public: LocalResponseNormOpModel(std::initializer_list input_shape, int radius, diff --git a/tensorflow/lite/experimental/writer/option_writer_generator.cc b/tensorflow/lite/experimental/writer/option_writer_generator.cc index d79b6e8ca9..ba0303f9db 100644 --- a/tensorflow/lite/experimental/writer/option_writer_generator.cc +++ b/tensorflow/lite/experimental/writer/option_writer_generator.cc @@ -161,7 +161,6 @@ class OpOptionData { ""; // TODO(aselle): maybe something else. op_to_option_["FLOOR"] = ""; op_to_option_["CEIL"] = ""; - op_to_option_["ABS"] = ""; op_to_option_["HASHTABLE_LOOKUP"] = ""; // TODO(aselle): maybe something else. op_to_option_["LOGISTIC"] = ""; diff --git a/tensorflow/lite/g3doc/tf_ops_compatibility.md b/tensorflow/lite/g3doc/tf_ops_compatibility.md index dc6ba8a463..d20320a3b5 100644 --- a/tensorflow/lite/g3doc/tf_ops_compatibility.md +++ b/tensorflow/lite/g3doc/tf_ops_compatibility.md @@ -373,17 +373,6 @@ outputs: { } ``` -**ABS** - -``` -inputs { - 0: tensor -} -outputs: { - 0: result of computing element-wise absolute value of the input tensor -} -``` - **FULLY_CONNECTED** ``` diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index f66f69870f..71d06ba4d7 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -158,7 +158,6 @@ cc_library( cc_library( name = "builtin_op_kernels", srcs = [ - "abs.cc", "activations.cc", "add.cc", "arg_min_max.cc", @@ -601,22 +600,6 @@ tf_cc_test( ], ) -tf_cc_test( - name = "abs_test", - size = "small", - srcs = ["abs_test.cc"], - tags = [ - "no_oss", - "tflite_not_portable_ios", - ], - deps = [ - ":builtin_ops", - "//tensorflow/lite:framework", - "//tensorflow/lite/kernels:test_util", - "@com_google_googletest//:gtest", - ], -) - tf_cc_test( name = "ceil_test", size = "small", diff --git a/tensorflow/lite/kernels/abs.cc b/tensorflow/lite/kernels/abs.cc deleted file mode 100644 index b44f9de630..0000000000 --- a/tensorflow/lite/kernels/abs.cc +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/c_api_internal.h" -#include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h" -#include "tensorflow/lite/kernels/internal/tensor.h" -#include "tensorflow/lite/kernels/kernel_util.h" - -namespace tflite { -namespace ops { -namespace builtin { -namespace abs { - -constexpr int kInputTensor = 0; -constexpr int kOutputTensor = 0; - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); - output->type = input->type; - TfLiteIntArray* output_size = TfLiteIntArrayCopy(input->dims); - return context->ResizeTensor(context, output, output_size); -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - - optimized_ops::Abs(GetTensorShape(input), GetTensorData(input), - GetTensorShape(output), GetTensorData(output)); - - return kTfLiteOk; -} -} // namespace abs - -TfLiteRegistration* Register_ABS() { - static TfLiteRegistration r = {/*init=*/nullptr, - /*free=*/nullptr, abs::Prepare, abs::Eval}; - return &r; -} - -} // namespace builtin -} // namespace ops -} // namespace tflite diff --git a/tensorflow/lite/kernels/abs_test.cc b/tensorflow/lite/kernels/abs_test.cc deleted file mode 100644 index 5e3f95c43a..0000000000 --- a/tensorflow/lite/kernels/abs_test.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include "tensorflow/lite/interpreter.h" -#include "tensorflow/lite/kernels/register.h" -#include "tensorflow/lite/kernels/test_util.h" -#include "tensorflow/lite/model.h" - -namespace tflite { -namespace { - -using ::testing::ElementsAreArray; - -class AbsOpModel : public SingleOpModel { - public: - AbsOpModel(std::initializer_list input_shape, TensorType input_type) { - input_ = AddInput(TensorType_FLOAT32); - output_ = AddOutput(TensorType_FLOAT32); - SetBuiltinOp(BuiltinOperator_ABS, BuiltinOptions_NONE, 0); - BuildInterpreter({ - input_shape, - }); - } - - int input() { return input_; } - - std::vector GetOutput() { return ExtractVector(output_); } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - private: - int input_; - int output_; -}; - -TEST(AbsOpTest, SingleDim) { - AbsOpModel model({2}, TensorType_FLOAT32); - model.PopulateTensor(model.input(), {8.5, -2.0}); - model.Invoke(); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({8.5, 2.0})); - EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); -} - -TEST(AbsOpTest, MultiDims) { - AbsOpModel model({2, 1, 1, 5}, TensorType_FLOAT32); - model.PopulateTensor(model.input(), { - 0.0001, - 8.0001, - 0.9999, - 9.9999, - 0.5, - -0.0001, - -8.0001, - -0.9999, - -9.9999, - -0.5, - }); - model.Invoke(); - EXPECT_THAT(model.GetOutput(), - ElementsAreArray({ - 0.0001, - 8.0001, - 0.9999, - 9.9999, - 0.5, - 0.0001, - 8.0001, - 0.9999, - 9.9999, - 0.5, - })); - EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); -} - -} // namespace -} // namespace tflite - -int main(int argc, char** argv) { - ::tflite::LogToStderr(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h index c24b7faa21..a76649f934 100644 --- a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h @@ -1746,12 +1746,6 @@ inline void Ceil(const float* input_data, const Dims<4>& input_dims, output_data); } -inline void Abs(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Abs(DimsToShape(input_dims), input_data, DimsToShape(output_dims), - output_data); -} - inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, const int32* output_size_data, const Dims<4>& output_size_dims, float* output_data, diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 7f7296ae97..0bd779bf3a 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -4906,14 +4906,6 @@ inline void Ceil(const RuntimeShape& input_shape, const float* input_data, output_map.array() = Eigen::ceil(input_map.array()); } -inline void Abs(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - gemmlowp::ScopedProfilingLabel label("Abs"); - auto input_map = MapAsVector(input_data, input_shape); - auto output_map = MapAsVector(output_data, output_shape); - output_map.array() = Eigen::abs(input_map.array()); -} - #ifdef USE_NEON inline void ResizeBilinearKernel(const float* input_ptr, int32 depth, float scale, float* output_ptr) { diff --git a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h index 30bb92c8b3..431e2413e8 100644 --- a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h @@ -1889,12 +1889,6 @@ inline void Ceil(const float* input_data, const Dims<4>& input_dims, output_data); } -inline void Abs(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Abs(DimsToShape(input_dims), input_data, DimsToShape(output_dims), - output_data); -} - template inline void ResizeBilinear(const T* input_data, const Dims<4>& input_dims, const int32* output_size_data, diff --git a/tensorflow/lite/kernels/internal/reference/reference_ops.h b/tensorflow/lite/kernels/internal/reference/reference_ops.h index 133a455e9e..6b8d817ca1 100644 --- a/tensorflow/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/reference_ops.h @@ -3050,16 +3050,6 @@ inline void Ceil(const RuntimeShape& input_shape, const float* input_data, } } -inline void Abs(const RuntimeShape& input_shape, const float* input_data, - const RuntimeShape& output_shape, float* output_data) { - const int flat_size = MatchingFlatSize(input_shape, output_shape); - - for (int i = 0; i < flat_size; i++) { - int offset = i; - output_data[offset] = std::fabs(input_data[offset]); - } -} - template inline void Gather(const tflite::GatherParams& op_params, const RuntimeShape& input_shape, const T* input_data, diff --git a/tensorflow/lite/kernels/register.cc b/tensorflow/lite/kernels/register.cc index caf61e9003..47ae934c4b 100644 --- a/tensorflow/lite/kernels/register.cc +++ b/tensorflow/lite/kernels/register.cc @@ -95,7 +95,6 @@ TfLiteRegistration* Register_LESS(); TfLiteRegistration* Register_LESS_EQUAL(); TfLiteRegistration* Register_FLOOR(); TfLiteRegistration* Register_CEIL(); -TfLiteRegistration* Register_ABS(); TfLiteRegistration* Register_TILE(); TfLiteRegistration* Register_NEG(); TfLiteRegistration* Register_SUM(); @@ -238,7 +237,6 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_LESS_EQUAL, Register_LESS_EQUAL()); AddBuiltin(BuiltinOperator_FLOOR, Register_FLOOR()); AddBuiltin(BuiltinOperator_CEIL, Register_CEIL()); - AddBuiltin(BuiltinOperator_ABS, Register_ABS()); AddBuiltin(BuiltinOperator_NEG, Register_NEG()); AddBuiltin(BuiltinOperator_SELECT, Register_SELECT()); AddBuiltin(BuiltinOperator_SLICE, Register_SLICE()); diff --git a/tensorflow/lite/nnapi/NeuralNetworksShim.h b/tensorflow/lite/nnapi/NeuralNetworksShim.h index 037d364c3b..82c5840952 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/lite/nnapi/NeuralNetworksShim.h @@ -144,7 +144,6 @@ enum { ANEURALNETWORKS_SUB = 36, ANEURALNETWORKS_TRANSPOSE = 37, ANEURALNETWORKS_CEIL = 38, - ANEURALNETWORKS_ABS = 39, }; /** diff --git a/tensorflow/lite/nnapi_delegate.cc b/tensorflow/lite/nnapi_delegate.cc index 6da803ce7b..dfbb4813ad 100644 --- a/tensorflow/lite/nnapi_delegate.cc +++ b/tensorflow/lite/nnapi_delegate.cc @@ -492,9 +492,6 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_CEIL: nn_op_type = ANEURALNETWORKS_CEIL; break; - case tflite::BuiltinOperator_ABS: - nn_op_type = ANEURALNETWORKS_ABS; - break; case tflite::BuiltinOperator_LOGISTIC: nn_op_type = ANEURALNETWORKS_LOGISTIC; break; diff --git a/tensorflow/lite/testing/generate_examples.py b/tensorflow/lite/testing/generate_examples.py index 825873a884..1dea01d59b 100644 --- a/tensorflow/lite/testing/generate_examples.py +++ b/tensorflow/lite/testing/generate_examples.py @@ -3047,32 +3047,6 @@ def make_ceil_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) -def make_abs_tests(zip_path): - """Make a set of tests to do abs.""" - - test_parameters = [{ - "input_dtype": [tf.float32], - "input_shape": [[1], [1, 2], [5, 6, 7, 8], [3, 4, 5, 6]], - }] - - def build_graph(parameters): - """Build the abs op testing graph.""" - input_value = tf.placeholder( - dtype=parameters["input_dtype"], - name="input1", - shape=parameters["input_shape"]) - out = tf.abs(input_value) - return [input_value], [out] - - def build_inputs(parameters, sess, inputs, outputs): - input_value = create_tensor_data(parameters["input_dtype"], - parameters["input_shape"]) - return [input_value], sess.run( - outputs, feed_dict={inputs[0]: input_value}) - - make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) - - def make_neg_tests(zip_path): """Make a set of tests to do neg.""" diff --git a/tensorflow/lite/toco/export_tensorflow.cc b/tensorflow/lite/toco/export_tensorflow.cc index 9dfeaad164..816cb9bc28 100644 --- a/tensorflow/lite/toco/export_tensorflow.cc +++ b/tensorflow/lite/toco/export_tensorflow.cc @@ -1215,16 +1215,6 @@ void ConvertCeilOperator(const Model& model, const CeilOperator& src_op, (*ceil_op->mutable_attr())["T"].set_type(DT_FLOAT); } -void ConvertAbsOperator(const Model& model, const AbsOperator& src_op, - GraphDef* tensorflow_graph) { - tensorflow::NodeDef* abs_op = tensorflow_graph->add_node(); - abs_op->set_op("Abs"); - abs_op->set_name(src_op.outputs[0]); - CHECK_EQ(src_op.inputs.size(), 1); - *abs_op->add_input() = src_op.inputs[0]; - (*abs_op->mutable_attr())["T"].set_type(DT_FLOAT); -} - void ConvertGatherOperator(const Model& model, const GatherOperator& src_op, GraphDef* tensorflow_graph) { tensorflow::NodeDef* gather_op = tensorflow_graph->add_node(); @@ -2192,9 +2182,6 @@ void ConvertOperator(const Model& model, const Operator& src_op, } else if (src_op.type == OperatorType::kCeil) { ConvertCeilOperator(model, static_cast(src_op), tensorflow_graph); - } else if (src_op.type == OperatorType::kAbs) { - ConvertAbsOperator(model, static_cast(src_op), - tensorflow_graph); } else if (src_op.type == OperatorType::kGather) { ConvertGatherOperator(model, static_cast(src_op), tensorflow_graph); diff --git a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc index c6c7681c03..0aae68c9c4 100644 --- a/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1870,7 +1870,6 @@ void ProcessMirrorPadOperator(Model* model, MirrorPadOperator* op) { case OperatorType::kCast: case OperatorType::kFloor: case OperatorType::kCeil: - case OperatorType::kAbs: case OperatorType::kExp: case OperatorType::kSin: case OperatorType::kLogicalAnd: diff --git a/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc b/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc index 90e64816c8..98105d384e 100644 --- a/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc +++ b/tensorflow/lite/toco/graph_transformations/reorder_elementwise_unary.cc @@ -29,7 +29,6 @@ namespace { bool IsElementwiseOperator(OperatorType optype) { switch (optype) { - case OperatorType::kAbs: case OperatorType::kCast: case OperatorType::kCeil: case OperatorType::kExp: diff --git a/tensorflow/lite/toco/import_tensorflow.cc b/tensorflow/lite/toco/import_tensorflow.cc index de8ae19a71..53defed6cb 100644 --- a/tensorflow/lite/toco/import_tensorflow.cc +++ b/tensorflow/lite/toco/import_tensorflow.cc @@ -1505,20 +1505,6 @@ tensorflow::Status ConvertCeilOperator( return tensorflow::Status::OK(); } -tensorflow::Status ConvertAbsOperator( - const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, - Model* model) { - CHECK_EQ(node.op(), "Abs"); - TF_QCHECK_OK(CheckInputsCount(node, tf_import_flags, 1)); - const auto data_type = GetDataTypeAttr(node, "T"); - CHECK(data_type == DT_FLOAT); - auto* op = new AbsOperator; - op->inputs.push_back(node.input(0)); - op->outputs.push_back(node.name()); - model->operators.emplace_back(op); - return tensorflow::Status::OK(); -} - tensorflow::Status ConvertGatherOperator( const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { diff --git a/tensorflow/lite/toco/model.h b/tensorflow/lite/toco/model.h index 69c6d7608a..c5cb215114 100644 --- a/tensorflow/lite/toco/model.h +++ b/tensorflow/lite/toco/model.h @@ -37,7 +37,6 @@ using tflite::QuantizationParams; enum class OperatorType : uint8 { kNone, // General-purpose neural network operators. - kAbs, kAdd, kAddN, kAveragePool, @@ -1670,16 +1669,6 @@ struct CeilOperator : Operator { CeilOperator() : Operator(OperatorType::kCeil) {} }; -// Abs operator. -// -// Inputs: -// inputs[0]: required: the input array -// -// TensorFlow equivalent: Abs -struct AbsOperator : Operator { - AbsOperator() : Operator(OperatorType::kAbs) {} -}; - // Gather operator. It gathers slices from params according to indices. // Only 1-D indices are supported at the moment. // diff --git a/tensorflow/lite/toco/tflite/operator.cc b/tensorflow/lite/toco/tflite/operator.cc index 2122d04958..dd47808046 100644 --- a/tensorflow/lite/toco/tflite/operator.cc +++ b/tensorflow/lite/toco/tflite/operator.cc @@ -1651,8 +1651,6 @@ std::vector> BuildOperatorList( MakeUnique>("FLOOR", OperatorType::kFloor)); ops.push_back( MakeUnique>("CEIL", OperatorType::kCeil)); - ops.push_back( - MakeUnique>("ABS", OperatorType::kAbs)); ops.push_back( MakeUnique>("RELU", OperatorType::kRelu)); ops.push_back(MakeUnique>( diff --git a/tensorflow/lite/toco/tflite/operator_test.cc b/tensorflow/lite/toco/tflite/operator_test.cc index e284619903..ba713edcb6 100644 --- a/tensorflow/lite/toco/tflite/operator_test.cc +++ b/tensorflow/lite/toco/tflite/operator_test.cc @@ -115,7 +115,6 @@ TEST_F(OperatorTest, SimpleOperators) { OperatorType::kDequantize); CheckSimpleOperator("FLOOR", OperatorType::kFloor); CheckSimpleOperator("CEIL", OperatorType::kCeil); - CheckSimpleOperator("ABS", OperatorType::kAbs); CheckSimpleOperator("RELU", OperatorType::kRelu); CheckSimpleOperator("RELU_N1_TO_1", OperatorType::kRelu1); CheckSimpleOperator("RELU6", OperatorType::kRelu6); diff --git a/tensorflow/lite/toco/tooling_util.cc b/tensorflow/lite/toco/tooling_util.cc index 0887dca9b5..9d6f554c59 100644 --- a/tensorflow/lite/toco/tooling_util.cc +++ b/tensorflow/lite/toco/tooling_util.cc @@ -386,7 +386,6 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(Cast) HANDLE_OPERATORTYPENAME_CASE(Floor) HANDLE_OPERATORTYPENAME_CASE(Ceil) - HANDLE_OPERATORTYPENAME_CASE(Abs) HANDLE_OPERATORTYPENAME_CASE(Gather) HANDLE_OPERATORTYPENAME_CASE(ResizeBilinear) HANDLE_OPERATORTYPENAME_CASE(SpaceToBatchND) -- GitLab From 9d5ac5c511dca4a959014b6f9882309bc9bd703f Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Thu, 13 Dec 2018 22:40:14 +0100 Subject: [PATCH 0029/1095] Fix typos --- tensorflow/python/data/experimental/ops/grouping.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/data/experimental/ops/grouping.py b/tensorflow/python/data/experimental/ops/grouping.py index 71e4b3391f..2eef36d0c8 100644 --- a/tensorflow/python/data/experimental/ops/grouping.py +++ b/tensorflow/python/data/experimental/ops/grouping.py @@ -162,8 +162,8 @@ def bucket_by_sequence_length(element_length_func, no_padding: `bool`, indicates whether to pad the batch features (features need to be either of type `tf.SparseTensor` or of same shape). drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing - whether the last batch should be dropped in the case its has fewer than - batch_size` elements; the default behavior is not to drop the smaller + whether the last batch should be dropped in the case it has fewer than + `batch_size` elements; the default behavior is not to drop the smaller batch. Returns: -- GitLab From a06497bce4e91b267850b8b82ee4fe684c01e94b Mon Sep 17 00:00:00 2001 From: Steve Lang Date: Fri, 14 Dec 2018 09:01:27 +1100 Subject: [PATCH 0030/1095] .numpy() was missing in code example: tf.add(1, 2).numpy() --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 68d7e180d1..c512aeb06c 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ $ python ```python >>> import tensorflow as tf >>> tf.enable_eager_execution() ->>> tf.add(1, 2) +>>> tf.add(1, 2).numpy() 3 >>> hello = tf.constant('Hello, TensorFlow!') >>> hello.numpy() -- GitLab From c8c5f69e9a4d2424ba4cc603daebd4c24e5128d4 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 13 Dec 2018 14:45:48 -0800 Subject: [PATCH 0031/1095] [Go]: Fixup paths to protocol buffers. Without this: go generate github.com/tensorflow/tensorflow/tensorflow/go/op would fail with: ../genop/internal/api_def_map.go:34:2: cannot find package "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/tensorflow/core/framework_go_proto" in any of: /usr/local/go/src/github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/tensorflow/core/framework_go_proto (from $GOROOT) /go/src/github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/tensorflow/core/framework_go_proto (from $GOPATH) This breakage was probably introduced by https://github.com/tensorflow/tensorflow/pull/17262 --- tensorflow/go/genop/internal/api_def_map.go | 2 +- tensorflow/go/genop/internal/genop.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/go/genop/internal/api_def_map.go b/tensorflow/go/genop/internal/api_def_map.go index 8600452b47..0bbd88b61c 100644 --- a/tensorflow/go/genop/internal/api_def_map.go +++ b/tensorflow/go/genop/internal/api_def_map.go @@ -31,7 +31,7 @@ import ( "unsafe" "github.com/golang/protobuf/proto" - pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/tensorflow/core/framework_go_proto" + pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework" ) // Encapsulates a collection of API definitions. diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go index fb81631218..1c05715a1a 100644 --- a/tensorflow/go/genop/internal/genop.go +++ b/tensorflow/go/genop/internal/genop.go @@ -47,7 +47,7 @@ import ( "unsafe" "github.com/golang/protobuf/proto" - pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/tensorflow/core/framework_go_proto" + pb "github.com/tensorflow/tensorflow/tensorflow/go/genop/internal/proto/github.com/tensorflow/tensorflow/tensorflow/go/core/framework" ) // GenerateFunctionsForRegisteredOps writes a Go source code file to w -- GitLab From c1fc5d5036d850be852ec473f32292ca973f5bed Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Thu, 13 Dec 2018 22:57:22 +0100 Subject: [PATCH 0032/1095] Rewrite grouping tests to be @parameterized tests --- .../data/experimental/kernel_tests/BUILD | 1 + .../bucket_by_sequence_length_test.py | 60 ++++++++++++------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD index d7ca5a70e4..897e949b0f 100644 --- a/tensorflow/python/data/experimental/kernel_tests/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/BUILD @@ -23,6 +23,7 @@ py_test( "//tensorflow/python/data/kernel_tests:test_base", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", ], ) diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py index fab79619a0..bcb7ef9496 100644 --- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py @@ -19,6 +19,8 @@ from __future__ import print_function import random +from absl.testing import parameterized + from tensorflow.python.data.experimental.ops import grouping from tensorflow.python.data.kernel_tests import test_base from tensorflow.python.data.ops import dataset_ops @@ -69,9 +71,13 @@ def _get_record_shape(sparse): return tensor_shape.TensorShape([None]) -class BucketBySequenceLengthTest(test_base.DatasetTestBase): +class BucketBySequenceLengthTest(test_base.DatasetTestBase, parameterized.TestCase): - def testBucketDropReminder(self): + @parameterized.named_parameters( + ("WithoutPadding", True), + ("WithPadding", False), + ) + def testBucketDropReminder(self, param_no_padding): boundaries = [10, 20, 30] batch_sizes = [10, 8, 4, 2] @@ -192,10 +198,13 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): .format(sorted(expected_lengths), sorted(generated_lengths))) - for no_padding in (True, False): - _test_bucket_by_padding(no_padding) + _test_bucket_by_padding(param_no_padding) - def testBucket(self): + @parameterized.named_parameters( + ("WithoutPadding", True), + ("WithPadding", False), + ) + def testBucket(self, param_no_padding): boundaries = [10, 20, 30] batch_sizes = [10, 8, 4, 2] @@ -251,8 +260,7 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) self.assertEqual(sorted(lengths), sorted(lengths_val)) - for no_padding in (True, False): - _test_bucket_by_padding(no_padding) + _test_bucket_by_padding(param_no_padding) def testPadToBoundary(self): @@ -336,7 +344,11 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): self.assertAllEqual(batches[4], [[1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]) - def testTupleElements(self): + @parameterized.named_parameters( + ("WithoutPadding", True), + ("WithPadding", False), + ) + def testTupleElements(self, param_no_padding): def build_dataset(sparse): def _generator(): @@ -364,10 +376,13 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): self.assertEqual([None, None], shapes[0].as_list()) self.assertEqual([None], shapes[1].as_list()) - for no_padding in (True, False): - _test_tuple_elements_by_padding(no_padding) + _test_tuple_elements_by_padding(param_no_padding) - def testBucketSparse(self): + @parameterized.named_parameters( + ("DoDropRemainder", True), + ("DoNotDropRemainder", False), + ) + def testBucketSparse(self, param_drop_remainder): """Tests bucketing of sparse tensors (case where `no_padding` == True). Test runs on following dataset: @@ -435,18 +450,17 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase): all_sparse_tensors.add(sprs_tensor) return all_sparse_tensors - for drop_remainder in (True, False): - dataset = _build_dataset() - boundaries = range(min_len + bucket_size + 1, max_len, bucket_size) - dataset = dataset.apply(grouping.bucket_by_sequence_length( - _element_length_fn, - boundaries, - [batch_size] * (len(boundaries) + 1), - no_padding=True, - drop_remainder=drop_remainder)) - batches = _compute_batches(dataset) - expected_batches = _compute_expected_batches(drop_remainder) - self.assertEqual(batches, expected_batches) + dataset = _build_dataset() + boundaries = range(min_len + bucket_size + 1, max_len, bucket_size) + dataset = dataset.apply(grouping.bucket_by_sequence_length( + _element_length_fn, + boundaries, + [batch_size] * (len(boundaries) + 1), + no_padding=True, + drop_remainder=param_drop_remainder)) + batches = _compute_batches(dataset) + expected_batches = _compute_expected_batches(param_drop_remainder) + self.assertEqual(batches, expected_batches) if __name__ == "__main__": -- GitLab From 8db73e896985d043f24592abf405fed4c867aee8 Mon Sep 17 00:00:00 2001 From: Pavel Samolysov Date: Mon, 17 Dec 2018 18:32:45 +0300 Subject: [PATCH 0033/1095] [OpenMP] Fix undeclared identifier in eigen_support.cc When OpenMP is enabled, the following error occurs during a compilation of the `tensorflow/lite/kernels/eigen_support.cc` unit: tensorflow/lite/kernels/eigen_support.cc:42:23: error: use of undeclared identifier 'context' Eigen::setNbThreads(context->recommended_num_threads); The `SetEigenNbThreads` method already gets the number of threads as the `threads` parameter and doesn't need to calculate it using a method of the `context` variable, so the invocation of the `Eigen::setNbThreads` member must be changed a little bit. Signed-off-by: Pavel Samolysov --- tensorflow/lite/kernels/eigen_support.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/eigen_support.cc b/tensorflow/lite/kernels/eigen_support.cc index bad5975a7c..e2a2c4aac9 100644 --- a/tensorflow/lite/kernels/eigen_support.cc +++ b/tensorflow/lite/kernels/eigen_support.cc @@ -39,7 +39,7 @@ void SetEigenNbThreads(int threads) { #if defined(EIGEN_HAS_OPENMP) // The global Eigen thread count is only used when OpenMP is enabled. As this // call causes problems with tsan, make it only when OpenMP is available. - Eigen::setNbThreads(context->recommended_num_threads); + Eigen::setNbThreads(threads); #endif // defined(EIGEN_HAS_OPENMP) } -- GitLab From c5720d1626c3c7e85069b0436bffacc8c6ca8122 Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Wed, 19 Dec 2018 11:54:28 +0100 Subject: [PATCH 0034/1095] Update testBucketDropReminder test case See #24071 --- .../bucket_by_sequence_length_test.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py index e0978676fd..a95d8e1049 100644 --- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py @@ -76,11 +76,12 @@ def _get_record_shape(sparse): @test_util.run_all_in_graph_and_eager_modes class BucketBySequenceLengthTest(test_base.DatasetTestBase, parameterized.TestCase): + # TODO(b/117581999): add eager coverage. @parameterized.named_parameters( ("WithoutPadding", True), ("WithPadding", False), ) - def testBucketDropReminder(self, param_no_padding): + def testSkipEagerBucketDropReminder(self, param_no_padding): boundaries = [10, 20, 30] batch_sizes = [10, 8, 4, 2] @@ -142,14 +143,15 @@ class BucketBySequenceLengthTest(test_base.DatasetTestBase, parameterized.TestCa batch_sizes, no_padding=no_padding, drop_remainder=True)) - batch, = dataset.make_one_shot_iterator().get_next() - - with self.cached_session() as sess: - batches = [] - for _ in range(n_expected_batches): - batches.append(self.evaluate(batch)) - with self.assertRaises(errors.OutOfRangeError): - self.evaluate(batch) + + get_next = self.getNext(dataset) + batches = [] + for _ in range(n_expected_batches): + batch, = self.evaluate(get_next()) + batches.append(batch) + + with self.assertRaises(errors.OutOfRangeError): + self.evaluate(get_next()) generated_lengths = [] -- GitLab From 5bbdac505efbc4decaf7f15b247a26060fe25d46 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 19 Dec 2018 12:39:03 -0800 Subject: [PATCH 0035/1095] Update lookup_ops.py --- tensorflow/python/ops/lookup_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index 9302696a45..70bbbb72e6 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -64,6 +64,7 @@ def initialize_all_tables(name="init_all_tables"): @tf_export(v1=["initializers.tables_initializer", "tables_initializer"]) def tables_initializer(name="init_all_tables"): """Returns an Op that initializes all tables of the default graph. + See the [Low Level Intro](https://www.tensorflow.org/guide/low_level_intro#feature_columns) guide, for an example of usage. -- GitLab From 9e92c2fe264debad1548554182dcc9af06d2e6ee Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 19 Dec 2018 13:48:25 -0800 Subject: [PATCH 0036/1095] Fix bug with renaming output bindings --- tensorflow/contrib/tensorrt/BUILD | 1 + .../contrib/tensorrt/convert/convert_nodes.cc | 17 +++++ .../tensorrt/test/identity_output_test.py | 72 +++++++++++++++++++ 3 files changed, 90 insertions(+) create mode 100644 tensorflow/contrib/tensorrt/test/identity_output_test.py diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 784acce444..3e8c486d8c 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -491,6 +491,7 @@ cuda_py_tests( "test/binary_tensor_weight_broadcast_test.py", "test/concatenation_test.py", "test/const_broadcast_test.py", + "test/identity_output_test.py", "test/manual_test.py", "test/memory_alignment_test.py", "test/multi_connection_neighbor_engine_test.py", diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index adf8831b96..729afe6c64 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -879,6 +879,8 @@ Status Converter::ConvertNode(const NodeDef& node_def) { // We need to check the name before setting it. If the input is one of the // engine input, setting the name here will overwrite engine input // bindings which will cause runtime error. + // TODO(tmorris): Remove this work-around once we use TRT's IIdentityLayer + // in ConvertIdentity. if (output.is_tensor()) { const char* tensor_name = output.tensor()->getName(); if (!tensorflow::str_util::StartsWith(tensor_name, kInputPHName)) { @@ -939,6 +941,21 @@ Status Converter::RenameAndMarkOutputTensors( if (tensor == nullptr) { return errors::NotFound("Output tensor not found: ", output.first); } + // Check if this tensor has already been marked as an output. + // ConvertIdentity can cause the same tensor to be repeated in + // output_tensors, which can cause us to overwrite the name of the output + // tensor binding. For example, if we rename OutputPH_0 to OutputPH_1 then + // we won't be able to locate OutputPH_0 during runtime. To fix this, + // duplicate the tensor using no-op shuffle. + // TODO(tmorris): Remove this work-around once we use TRT's IIdentityLayer + // in ConvertIdentity. + if (tensorflow::str_util::StartsWith(tensor->getName(), kOutputPHName)) { + nvinfer1::IShuffleLayer* layer = network()->addShuffle(*tensor); + TFTRT_RETURN_ERROR_IF_NULLPTR( + layer, StrCat("Output Copy for ", tensor->getName())); + MarkQuantizationRangesAsInferrable(tensor, layer->getOutput(0)); + tensor = layer->getOutput(0); + } tensor->setName(output.second.c_str()); VLOG(1) << "Marking output tensor " << output.first << ", as output tensor " << output.second; diff --git a/tensorflow/contrib/tensorrt/test/identity_output_test.py b/tensorflow/contrib/tensorrt/test/identity_output_test.py new file mode 100644 index 0000000000..391434ba83 --- /dev/null +++ b/tensorflow/contrib/tensorrt/test/identity_output_test.py @@ -0,0 +1,72 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Model script to test TF-TensorRT integration.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.platform import test + + +class IdentityTest(trt_test.TfTrtIntegrationTestBase): + + def _ConstOp(self, shape): + return constant_op.constant(np.random.randn(*shape), dtype=dtypes.float32) + + def GetParams(self): + """Testing conversion of BiasAdd MatMul in TF-TRT conversion.""" + input_name = "input" + input_dims = [100, 32] + g = ops.Graph() + with g.as_default(): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=input_dims, name=input_name) + + b = self._ConstOp((32, 4)) + x1 = math_ops.matmul(x, b) + b = self._ConstOp((1, 4)) + x1 = x1 + b + + out1 = array_ops.identity(x1, name='output1') + out2 = array_ops.identity(x1, name='output2') + iden1 = array_ops.identity(x1) + out3 = array_ops.identity(iden1, name='output3') + + return trt_test.TfTrtIntegrationTestParams( + gdef=g.as_graph_def(), + input_names=[input_name], + input_dims=[input_dims], + output_names=['output1', 'output2', 'output3'], + expected_output_dims=[(100, 4), (100, 4), (100, 4)]) + + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return ["TRTEngineOp_0"] + + +if __name__ == "__main__": + test.main() -- GitLab From cfe7df2cf064f9eacdf8ab30574ebe9ec6438544 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 19 Dec 2018 14:14:28 -0800 Subject: [PATCH 0037/1095] Update comment in test --- tensorflow/contrib/tensorrt/test/identity_output_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tensorrt/test/identity_output_test.py b/tensorflow/contrib/tensorrt/test/identity_output_test.py index 391434ba83..1aa05999bd 100644 --- a/tensorflow/contrib/tensorrt/test/identity_output_test.py +++ b/tensorflow/contrib/tensorrt/test/identity_output_test.py @@ -37,7 +37,7 @@ class IdentityTest(trt_test.TfTrtIntegrationTestBase): return constant_op.constant(np.random.randn(*shape), dtype=dtypes.float32) def GetParams(self): - """Testing conversion of BiasAdd MatMul in TF-TRT conversion.""" + """Testing engine with the same tensor repeated as output via identity.""" input_name = "input" input_dims = [100, 32] g = ops.Graph() -- GitLab From f03cf64b1295ea4e9e431a7a524cc23b2c1f3306 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 19 Dec 2018 15:10:58 -0800 Subject: [PATCH 0038/1095] Explain usage of shuffle layer --- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 729afe6c64..2a402fe699 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -950,6 +950,7 @@ Status Converter::RenameAndMarkOutputTensors( // TODO(tmorris): Remove this work-around once we use TRT's IIdentityLayer // in ConvertIdentity. if (tensorflow::str_util::StartsWith(tensor->getName(), kOutputPHName)) { + // Using shuffle layer for identity by not setting reshape or transpose. nvinfer1::IShuffleLayer* layer = network()->addShuffle(*tensor); TFTRT_RETURN_ERROR_IF_NULLPTR( layer, StrCat("Output Copy for ", tensor->getName())); -- GitLab From d489a943a0fbb995d48ee70558270c4a5f0f9c39 Mon Sep 17 00:00:00 2001 From: Taylor Thornton Date: Tue, 18 Dec 2018 22:19:59 -0800 Subject: [PATCH 0039/1095] upgrade aws-sdk-cpp to 1.5.8 in order to pick up the auth retry changeset that landed in that version --- third_party/aws/BUILD.bazel | 5 +++++ third_party/aws/workspace.bzl | 11 +++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/third_party/aws/BUILD.bazel b/third_party/aws/BUILD.bazel index 5426f79e46..1f61858777 100644 --- a/third_party/aws/BUILD.bazel +++ b/third_party/aws/BUILD.bazel @@ -80,6 +80,11 @@ cc_library( deps = [ "@curl", ], + copts = [ + "-DAWS_SDK_VERSION_MAJOR=1", + "-DAWS_SDK_VERSION_MINOR=5", + "-DAWS_SDK_VERSION_PATCH=8" + ], ) template_rule( diff --git a/third_party/aws/workspace.bzl b/third_party/aws/workspace.bzl index c216638154..10799b5153 100644 --- a/third_party/aws/workspace.bzl +++ b/third_party/aws/workspace.bzl @@ -2,14 +2,17 @@ load("//third_party:repo.bzl", "third_party_http_archive") +# NOTE: version updates here should also update the major, minor, and patch variables declared in +# the copts field of the //third_party/aws:aws target + def repo(): third_party_http_archive( name = "aws", urls = [ - "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", - "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", + "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.5.8.tar.gz", + "https://github.com/aws/aws-sdk-cpp/archive/1.5.8.tar.gz", ], - sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", - strip_prefix = "aws-sdk-cpp-1.3.15", + sha256 = "89905075fe50aa13e0337ff905c2e8c1ce9caf77a3504484a7cda39179120ffc", + strip_prefix = "aws-sdk-cpp-1.5.8", build_file = "//third_party/aws:BUILD.bazel", ) -- GitLab From c75023169590ef0534579758285d6a1d8ad54ad2 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 19 Dec 2018 23:38:51 +0000 Subject: [PATCH 0040/1095] Replace deprecated FastGFile with GFile FastGFile has been deprecated and replaced with GFile, though the example in speech_commands still uses FastGFile. This fix fix the issue to remove the deprecated warning: ``` WARNING:tensorflow:From :1: __init__ (from tensorflow.python.platform.gfile) is deprecated and will be removed in a future version. Instructions for updating: Use tf.gfile.GFile. ``` Signed-off-by: Yong Tang --- tensorflow/examples/speech_commands/label_wav.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/speech_commands/label_wav.py b/tensorflow/examples/speech_commands/label_wav.py index 0017aec3a5..eb8323454c 100644 --- a/tensorflow/examples/speech_commands/label_wav.py +++ b/tensorflow/examples/speech_commands/label_wav.py @@ -45,7 +45,7 @@ FLAGS = None def load_graph(filename): """Unpersists graph from file as default graph.""" - with tf.gfile.FastGFile(filename, 'rb') as f: + with tf.gfile.GFile(filename, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') -- GitLab From 4696da4bf9a586cf250b32a062da833af3ffd4a9 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 19 Dec 2018 23:41:52 +0000 Subject: [PATCH 0041/1095] Also replace FastGFile to GFile in label_wav_dir.py Signed-off-by: Yong Tang --- tensorflow/examples/speech_commands/label_wav_dir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/examples/speech_commands/label_wav_dir.py b/tensorflow/examples/speech_commands/label_wav_dir.py index a34db512dd..2e1890c3e8 100644 --- a/tensorflow/examples/speech_commands/label_wav_dir.py +++ b/tensorflow/examples/speech_commands/label_wav_dir.py @@ -46,7 +46,7 @@ FLAGS = None def load_graph(filename): """Unpersists graph from file as default graph.""" - with tf.gfile.FastGFile(filename, 'rb') as f: + with tf.gfile.GFile(filename, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def, name='') -- GitLab From 0f10d9fcd0c47b497d1bd5069f28d21f94fedc60 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Wed, 19 Dec 2018 15:56:19 -0800 Subject: [PATCH 0042/1095] Improve comment --- tensorflow/contrib/tensorrt/test/identity_output_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tensorrt/test/identity_output_test.py b/tensorflow/contrib/tensorrt/test/identity_output_test.py index 1aa05999bd..2be9da9ede 100644 --- a/tensorflow/contrib/tensorrt/test/identity_output_test.py +++ b/tensorflow/contrib/tensorrt/test/identity_output_test.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Model script to test TF-TensorRT integration.""" +"""This test checks a situation where the same tensor is considered as an output +multiple times because it has been duplicated by 2+ indentity ops. Previously, +the tensor would be renamed multiple times, overwriting the output binding name +which resulted in a runtime error when the binding would not be found. +""" from __future__ import absolute_import from __future__ import division -- GitLab From 915b8783a05b0da7c30ba36531ce03c811930852 Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Wed, 19 Dec 2018 19:13:27 -0700 Subject: [PATCH 0043/1095] [Intel MKL] Remove TensorFlow lite test from the public CI. MKL does not support TF lite. --- tensorflow/tools/ci_build/linux/cpu/run_mkl.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh index 7be5f454ec..a8b73cbe0c 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_mkl.sh @@ -36,4 +36,4 @@ yes "" | $PYTHON_BIN_PATH configure.py bazel test --test_tag_filters=-no_oss,-oss_serial,-gpu,-benchmark-test --test_lang_filters=cc,py -k \ --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 --build_tests_only \ --config=mkl --test_env=KMP_BLOCKTIME=0 --config=opt --test_output=errors -- \ - //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... + //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... -//tensorflow/lite/... -- GitLab From f4867d3e4fecdb1ca8a445addad383aa0292a3b3 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 19 Dec 2018 19:40:22 -0800 Subject: [PATCH 0044/1095] Initialize only CPU devices in optimize_dataset_op --- tensorflow/core/grappler/grappler_item_builder.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 9224ee7849..a984efd10d 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -103,7 +103,11 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, // Instantiate all variables for function library runtime creation. std::vector> devices; - TF_RETURN_IF_ERROR(DeviceFactory::AddDevices( + // Only CPU device is used so instead of calling DeviceFactory::AddDevices() + // with dummy session config, which will conflict with user defined options and + // create unwanted devices, call cpu_factory->CreateDevices() to get CPU only devices. + DeviceFactory* cpu_factory = DeviceFactory::GetFactory("CPU"); + TF_RETURN_IF_ERROR(cpu_factory->CreateDevices( options, "/job:localhost/replica:0/task:0", &devices)); Device* cpu_device = devices[0].get(); std::unique_ptr dvc_mgr(new DeviceMgr(std::move(devices))); -- GitLab From 85ef6de9fcb5977bd738e10264f0641869594b83 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 19 Dec 2018 19:52:47 -0800 Subject: [PATCH 0045/1095] Fix clang format --- tensorflow/core/grappler/grappler_item_builder.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index a984efd10d..fc55fb5b3d 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -104,8 +104,9 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, // Instantiate all variables for function library runtime creation. std::vector> devices; // Only CPU device is used so instead of calling DeviceFactory::AddDevices() - // with dummy session config, which will conflict with user defined options and - // create unwanted devices, call cpu_factory->CreateDevices() to get CPU only devices. + // with dummy session config, which will conflict with user defined options + // and create unwanted devices, call cpu_factory->CreateDevices() to get CPU + // only devices. DeviceFactory* cpu_factory = DeviceFactory::GetFactory("CPU"); TF_RETURN_IF_ERROR(cpu_factory->CreateDevices( options, "/job:localhost/replica:0/task:0", &devices)); -- GitLab From 2565842e71e1665d8a4fc126edc40b37becfed71 Mon Sep 17 00:00:00 2001 From: Innovimax Date: Thu, 20 Dec 2018 14:11:27 +0100 Subject: [PATCH 0046/1095] fix typo --- tensorflow/java/src/gen/cc/op_gen_main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/java/src/gen/cc/op_gen_main.cc b/tensorflow/java/src/gen/cc/op_gen_main.cc index 0d9e0883af..cf4bb03dad 100644 --- a/tensorflow/java/src/gen/cc/op_gen_main.cc +++ b/tensorflow/java/src/gen/cc/op_gen_main.cc @@ -35,7 +35,7 @@ const char kUsageHeader[] = "graph.\n\n" "Operation wrappers are generated under the path specified by the " "'--output_dir' argument. This path can be absolute or relative to the\n" - "current working directory and will be created if it does not exists.\n\n" + "current working directory and will be created if it does not exist.\n\n" "Note that the operations will not be available through the " "'org.tensorflow.op.Ops' API until the generated classes are compiled\n" "using an appropriate annotation processor.\n\n" -- GitLab From 6cbd27b8ffb0ee33ae0f3151a202ba8dd0fa22a9 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 20 Dec 2018 10:26:17 -0800 Subject: [PATCH 0047/1095] Only apply size limit if size of tensor is increasing --- tensorflow/core/grappler/optimizers/constant_folding.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 3882e3b3a9..bb7ed05e33 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -913,6 +913,7 @@ Status ConstantFolding::CreateNodeDef(const string& name, AttrValue attr_tensor; TensorProto* t = attr_tensor.mutable_tensor(); bool optimized = false; + const size_t original_size = tensor->TotalBytes(); size_t encoded_size; // Use the packed representation whenever possible to avoid generating large // graphdefs. Moreover, avoid repeating the last values if they're equal. @@ -980,11 +981,11 @@ Status ConstantFolding::CreateNodeDef(const string& name, } node->mutable_attr()->insert({"value", attr_tensor}); - if (encoded_size < 10 * 1024 * 1024) { - return Status::OK(); + if (encoded_size > original_size && encoded_size >= 10 * 1024 * 1024) { + return errors::InvalidArgument( + strings::StrCat("Can't fold ", name, ", its size would be too large")); } - return errors::InvalidArgument( - strings::StrCat("Can't fold ", name, ", its size would be too large")); + return Status::OK(); } Status ConstantFolding::EvaluateNode(const NodeDef& node, -- GitLab From a9ba4a47efb80d2951ee6deea8e2d8de56dbaaf9 Mon Sep 17 00:00:00 2001 From: vitor-alves Date: Thu, 20 Dec 2018 17:26:45 -0200 Subject: [PATCH 0048/1095] Typo --- tensorflow/lite/examples/label_image/label_image.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/examples/label_image/label_image.md b/tensorflow/lite/examples/label_image/label_image.md index fd9f49918b..debe8ffbc5 100644 --- a/tensorflow/lite/examples/label_image/label_image.md +++ b/tensorflow/lite/examples/label_image/label_image.md @@ -51,7 +51,7 @@ average time: 100.986 ms 0.0235294: 514 cornet 0.0196078: 835 suit ``` -Run `interpreter->Invoker()` 100 times: +Run `interpreter->Invoke()` 100 times: ``` > ./label_image -c 100 Loaded model ./mobilenet_quant_v1_224.tflite -- GitLab From beed5ef6fc98f513f368e68ae08357a8db623ef0 Mon Sep 17 00:00:00 2001 From: Pooya Davoodi Date: Thu, 20 Dec 2018 11:38:58 -0800 Subject: [PATCH 0049/1095] TFTRT: Convert between str and unicode in py2 Also check for types before doing conversion. --- tensorflow/contrib/tensorrt/python/trt_convert.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 203b2697ba..3d2b6b499d 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -46,11 +46,13 @@ from tensorflow.python.saved_model import tag_constants from tensorflow.python.training import saver if _six.PY2: - _to_bytes = lambda s: s - _to_string = lambda s: s + _to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape") \ + if isinstance(s, unicode) else s + _to_string = lambda s: s.decode("utf-8") if isinstance(s, str) else s else: - _to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape") - _to_string = lambda s: s.decode("utf-8") + _to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape") \ + if isinstance(s, str) else s + _to_string = lambda s: s.decode("utf-8") if isinstance(s, bytes) else s class TrtPrecisionMode(object): -- GitLab From 58621c1db6ee0ac52aab0ff868f666454d2aa5a3 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 20 Dec 2018 11:47:58 -0800 Subject: [PATCH 0050/1095] Calculate input size correctly --- .../core/grappler/optimizers/constant_folding.cc | 11 ++++++----- .../core/grappler/optimizers/constant_folding.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index bb7ed05e33..5a053d3a89 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -902,7 +902,8 @@ DataType GetDataTypeFromNodeOrProps(const NodeDef& node, // static Status ConstantFolding::CreateNodeDef(const string& name, const TensorValue& tensor, - NodeDef* node) { + NodeDef* node, + size_t input_size) { node->set_name(name); node->set_op("Const"); @@ -913,7 +914,6 @@ Status ConstantFolding::CreateNodeDef(const string& name, AttrValue attr_tensor; TensorProto* t = attr_tensor.mutable_tensor(); bool optimized = false; - const size_t original_size = tensor->TotalBytes(); size_t encoded_size; // Use the packed representation whenever possible to avoid generating large // graphdefs. Moreover, avoid repeating the last values if they're equal. @@ -980,8 +980,7 @@ Status ConstantFolding::CreateNodeDef(const string& name, encoded_size = t->tensor_content().size(); } node->mutable_attr()->insert({"value", attr_tensor}); - - if (encoded_size > original_size && encoded_size >= 10 * 1024 * 1024) { + if (encoded_size > input_size && encoded_size >= 10 * 1024 * 1024) { return errors::InvalidArgument( strings::StrCat("Can't fold ", name, ", its size would be too large")); } @@ -1011,6 +1010,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, } }); + size_t input_size = 0; for (const auto& input : node.input()) { const TensorId input_tensor = ParseTensorName(input); if (input_tensor.index() < 0) { @@ -1025,6 +1025,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, } TF_RETURN_IF_ERROR(CheckAttrExists(*input_node, "value")); const TensorProto& raw_val = input_node->attr().at("value").tensor(); + input_size += raw_val.tensor_content().size(); Tensor* value = new Tensor(raw_val.dtype(), raw_val.tensor_shape()); CHECK(value->FromProto(raw_val)); inputs.emplace_back(value); @@ -1042,7 +1043,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, node_name = strings::StrCat(node_name, "-", i); } if (output_tensors[i].tensor) { - Status s = CreateNodeDef(node_name, output_tensors[i], &outputs->at(i)); + Status s = CreateNodeDef(node_name, output_tensors[i], &outputs->at(i), input_size); if (!s.ok()) { *result_too_large = true; return s; diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 0b778882d7..8843ac161f 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -36,7 +36,7 @@ const char kConstantFoldingCtrl[] = "ConstantFoldingCtrl"; class ConstantFolding : public GraphOptimizer { public: static Status CreateNodeDef(const string& name, const TensorValue& tensor, - NodeDef* node); + NodeDef* node, size_t input_size = 0); static string AddControlDependency(const string& input_name, GraphDef* graph, NodeMap* node_map); -- GitLab From b71a6b031adf0da61db59312847cccb675771b84 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 20 Dec 2018 14:51:38 -0800 Subject: [PATCH 0051/1095] Fix input size, add comment --- tensorflow/core/grappler/optimizers/constant_folding.cc | 7 ++++--- tensorflow/core/grappler/optimizers/constant_folding.h | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 5a053d3a89..a5f6f57747 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1010,7 +1010,7 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, } }); - size_t input_size = 0; + size_t total_inputs_size = 0; for (const auto& input : node.input()) { const TensorId input_tensor = ParseTensorName(input); if (input_tensor.index() < 0) { @@ -1025,10 +1025,10 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, } TF_RETURN_IF_ERROR(CheckAttrExists(*input_node, "value")); const TensorProto& raw_val = input_node->attr().at("value").tensor(); - input_size += raw_val.tensor_content().size(); Tensor* value = new Tensor(raw_val.dtype(), raw_val.tensor_shape()); CHECK(value->FromProto(raw_val)); inputs.emplace_back(value); + total_inputs_size += value->TotalBytes(); } TF_RETURN_IF_ERROR(EvaluateNode(node, inputs, &output_tensors)); @@ -1043,7 +1043,8 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node, node_name = strings::StrCat(node_name, "-", i); } if (output_tensors[i].tensor) { - Status s = CreateNodeDef(node_name, output_tensors[i], &outputs->at(i), input_size); + Status s = CreateNodeDef(node_name, output_tensors[i], &outputs->at(i), + total_inputs_size); if (!s.ok()) { *result_too_large = true; return s; diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 8843ac161f..58a962ba9b 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -35,6 +35,8 @@ const char kConstantFoldingCtrl[] = "ConstantFoldingCtrl"; // Constant folding optimization for a graph. class ConstantFolding : public GraphOptimizer { public: + // The size limit will only be considered if the newly created node is greater + // than input_size (optional). static Status CreateNodeDef(const string& name, const TensorValue& tensor, NodeDef* node, size_t input_size = 0); static string AddControlDependency(const string& input_name, GraphDef* graph, -- GitLab From 676e0be5b166d08e4af5c3044e889c66de6837c9 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Thu, 20 Dec 2018 15:16:11 -0800 Subject: [PATCH 0052/1095] Rename input_size -> original_size --- tensorflow/core/grappler/optimizers/constant_folding.cc | 5 +++-- tensorflow/core/grappler/optimizers/constant_folding.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index a5f6f57747..89be08800d 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -903,7 +903,7 @@ DataType GetDataTypeFromNodeOrProps(const NodeDef& node, Status ConstantFolding::CreateNodeDef(const string& name, const TensorValue& tensor, NodeDef* node, - size_t input_size) { + size_t original_size) { node->set_name(name); node->set_op("Const"); @@ -980,7 +980,8 @@ Status ConstantFolding::CreateNodeDef(const string& name, encoded_size = t->tensor_content().size(); } node->mutable_attr()->insert({"value", attr_tensor}); - if (encoded_size > input_size && encoded_size >= 10 * 1024 * 1024) { + + if (encoded_size > original_size && encoded_size >= 10 * 1024 * 1024) { return errors::InvalidArgument( strings::StrCat("Can't fold ", name, ", its size would be too large")); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 58a962ba9b..bb86c4aeb3 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -36,9 +36,9 @@ const char kConstantFoldingCtrl[] = "ConstantFoldingCtrl"; class ConstantFolding : public GraphOptimizer { public: // The size limit will only be considered if the newly created node is greater - // than input_size (optional). + // than original_size (optional). static Status CreateNodeDef(const string& name, const TensorValue& tensor, - NodeDef* node, size_t input_size = 0); + NodeDef* node, size_t original_size = 0); static string AddControlDependency(const string& input_name, GraphDef* graph, NodeMap* node_map); -- GitLab From 79bc4f4bf914d4b94eef89e8df2e19ff54c2db71 Mon Sep 17 00:00:00 2001 From: Siju Samuel Date: Fri, 21 Dec 2018 10:31:19 +0530 Subject: [PATCH 0053/1095] Review comments updated, nnapi removed for ceil --- .../lite/delegates/nnapi/nnapi_delegate.cc | 10 ---- .../delegates/nnapi/nnapi_delegate_test.cc | 49 ------------------- tensorflow/lite/kernels/BUILD | 1 - .../internal/optimized/legacy_optimized_ops.h | 6 --- .../internal/reference/legacy_reference_ops.h | 6 --- tensorflow/lite/nnapi/NeuralNetworksShim.h | 1 - tensorflow/lite/nnapi_delegate.cc | 3 -- 7 files changed, 76 deletions(-) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc index cac98ae3da..4fe07004a8 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate.cc @@ -638,16 +638,6 @@ class NNAPIDelegateKernel { return nullptr; } break; - case kTfLiteBuiltinCeil: - if (version == 1) { - return [](const NNAPIOpMappingArgs& mapping_args) - -> ANeuralNetworksOperationType { - return ANEURALNETWORKS_CEIL; - }; - } else { - return nullptr; - } - break; case kTfLiteBuiltinRelu: if (version == 1) { return [](const NNAPIOpMappingArgs& mapping_args) diff --git a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc index f6a04e36cd..ca48af0c95 100644 --- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -1024,55 +1024,6 @@ TEST(NNAPIDelegate, FloorMultiDims) { EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); } -class CeilOpModel : public SingleOpModelWithNNAPI { - public: - CeilOpModel(std::initializer_list input_shape, TensorType input_type) { - input_ = AddInput(TensorType_FLOAT32); - output_ = AddOutput(TensorType_FLOAT32); - SetBuiltinOp(BuiltinOperator_CEIL, BuiltinOptions_NONE, 0); - BuildInterpreter({ - input_shape, - }); - } - - int input() { return input_; } - - std::vector GetOutput() { return ExtractVector(output_); } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - private: - int input_; - int output_; -}; - -TEST(NNAPIDelegate, CeilSingleDim) { - CeilOpModel model({2}, TensorType_FLOAT32); - model.PopulateTensor(model.input(), {8.5, 0.0}); - model.Invoke(); - EXPECT_THAT(model.GetOutput(), ElementsAreArray({8, 0})); - EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2})); -} - -TEST(NNAPIDelegate, CeilMultiDims) { - CeilOpModel model({2, 1, 1, 5}, TensorType_FLOAT32); - model.PopulateTensor(model.input(), { - 0.0001, - 8.0001, - 0.9999, - 9.9999, - 0.5, - -0.0001, - -8.0001, - -0.9999, - -9.9999, - -0.5, - }); - model.Invoke(); - EXPECT_THAT(model.GetOutput(), - ElementsAreArray({1, 9, 1, 10, 1, 0, -8, 0, -9, 0})); - EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 1, 5})); -} - class LocalResponseNormOpModel : public SingleOpModelWithNNAPI { public: LocalResponseNormOpModel(std::initializer_list input_shape, int radius, diff --git a/tensorflow/lite/kernels/BUILD b/tensorflow/lite/kernels/BUILD index 71d06ba4d7..1d53022e03 100644 --- a/tensorflow/lite/kernels/BUILD +++ b/tensorflow/lite/kernels/BUILD @@ -605,7 +605,6 @@ tf_cc_test( size = "small", srcs = ["ceil_test.cc"], tags = [ - "no_oss", "tflite_not_portable_ios", ], deps = [ diff --git a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h index a76649f934..5485d907c2 100644 --- a/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h @@ -1740,12 +1740,6 @@ inline void Floor(const float* input_data, const Dims<4>& input_dims, output_data); } -inline void Ceil(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Ceil(DimsToShape(input_dims), input_data, DimsToShape(output_dims), - output_data); -} - inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, const int32* output_size_data, const Dims<4>& output_size_dims, float* output_data, diff --git a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h index 431e2413e8..380fc8f98e 100644 --- a/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/lite/kernels/internal/reference/legacy_reference_ops.h @@ -1883,12 +1883,6 @@ inline void Floor(const float* input_data, const Dims<4>& input_dims, output_data); } -inline void Ceil(const float* input_data, const Dims<4>& input_dims, - float* output_data, const Dims<4>& output_dims) { - Ceil(DimsToShape(input_dims), input_data, DimsToShape(output_dims), - output_data); -} - template inline void ResizeBilinear(const T* input_data, const Dims<4>& input_dims, const int32* output_size_data, diff --git a/tensorflow/lite/nnapi/NeuralNetworksShim.h b/tensorflow/lite/nnapi/NeuralNetworksShim.h index 82c5840952..c39502f4ac 100644 --- a/tensorflow/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/lite/nnapi/NeuralNetworksShim.h @@ -143,7 +143,6 @@ enum { ANEURALNETWORKS_STRIDED_SLICE = 35, ANEURALNETWORKS_SUB = 36, ANEURALNETWORKS_TRANSPOSE = 37, - ANEURALNETWORKS_CEIL = 38, }; /** diff --git a/tensorflow/lite/nnapi_delegate.cc b/tensorflow/lite/nnapi_delegate.cc index dfbb4813ad..26d75696a1 100644 --- a/tensorflow/lite/nnapi_delegate.cc +++ b/tensorflow/lite/nnapi_delegate.cc @@ -489,9 +489,6 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_FLOOR: nn_op_type = ANEURALNETWORKS_FLOOR; break; - case tflite::BuiltinOperator_CEIL: - nn_op_type = ANEURALNETWORKS_CEIL; - break; case tflite::BuiltinOperator_LOGISTIC: nn_op_type = ANEURALNETWORKS_LOGISTIC; break; -- GitLab From 8da29925c65dc72b49f693942923519b38dd0242 Mon Sep 17 00:00:00 2001 From: Yves-Noel Weweler Date: Fri, 21 Dec 2018 15:58:22 +0100 Subject: [PATCH 0054/1095] Fix pylint warnings from Ubuntu Sanity test --- .../kernel_tests/bucket_by_sequence_length_test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py index a95d8e1049..4f1ea9f2ee 100644 --- a/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py +++ b/tensorflow/python/data/experimental/kernel_tests/bucket_by_sequence_length_test.py @@ -74,12 +74,13 @@ def _get_record_shape(sparse): @test_util.run_all_in_graph_and_eager_modes -class BucketBySequenceLengthTest(test_base.DatasetTestBase, parameterized.TestCase): +class BucketBySequenceLengthTest(test_base.DatasetTestBase, + parameterized.TestCase): # TODO(b/117581999): add eager coverage. @parameterized.named_parameters( - ("WithoutPadding", True), - ("WithPadding", False), + ("WithoutPadding", True), + ("WithPadding", False), ) def testSkipEagerBucketDropReminder(self, param_no_padding): -- GitLab From 7859702e8a7093f242c167ea027cc227f1f9d048 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Fri, 21 Dec 2018 08:57:29 -0800 Subject: [PATCH 0055/1095] Fix failing test in TRT4.0 - TRT bug with tensors rank < 3 in INT8 mode --- tensorflow/contrib/tensorrt/test/identity_output_test.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/contrib/tensorrt/test/identity_output_test.py b/tensorflow/contrib/tensorrt/test/identity_output_test.py index 2be9da9ede..da7d70876d 100644 --- a/tensorflow/contrib/tensorrt/test/identity_output_test.py +++ b/tensorflow/contrib/tensorrt/test/identity_output_test.py @@ -71,6 +71,11 @@ class IdentityTest(trt_test.TfTrtIntegrationTestBase): """Return the expected engines to build.""" return ["TRTEngineOp_0"] + def ShouldRunTest(self, run_params): + """Whether to run the test.""" + # TODO(aaroey): Trt 4.0 forbids conversion for tensors with rank <3 in int8 + # mode, which is a bug. Re-enable this when trt library is fixed. + return not trt_test.IsQuantizationMode(run_params.precision_mode) if __name__ == "__main__": test.main() -- GitLab From 753a3a3d7fa79e390ac8c068dc52acd03e0e4187 Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Fri, 21 Dec 2018 09:07:40 -0800 Subject: [PATCH 0056/1095] Report size and size limit with error message --- tensorflow/core/grappler/optimizers/constant_folding.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 89be08800d..8804dedad1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -983,7 +983,8 @@ Status ConstantFolding::CreateNodeDef(const string& name, if (encoded_size > original_size && encoded_size >= 10 * 1024 * 1024) { return errors::InvalidArgument( - strings::StrCat("Can't fold ", name, ", its size would be too large")); + strings::StrCat("Can't fold ", name, ", its size would be too large (", + encoded_size, " >= ", 10 * 1024 * 1024, " bytes)")); } return Status::OK(); } -- GitLab From ab72f61de52f59887d262b23543cc5d5508e753e Mon Sep 17 00:00:00 2001 From: Trevor Morris Date: Fri, 21 Dec 2018 14:18:09 -0800 Subject: [PATCH 0057/1095] Fix clang-format --- tensorflow/core/grappler/optimizers/constant_folding.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 8804dedad1..adaf3cd07a 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -901,8 +901,7 @@ DataType GetDataTypeFromNodeOrProps(const NodeDef& node, // static Status ConstantFolding::CreateNodeDef(const string& name, - const TensorValue& tensor, - NodeDef* node, + const TensorValue& tensor, NodeDef* node, size_t original_size) { node->set_name(name); node->set_op("Const"); -- GitLab From 6bd27704c472b8d1cbd64b600fe00f223e74b4f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=8C=AF=E5=8D=8E=20=28WANG=20Zhenhua=29?= Date: Sat, 22 Dec 2018 10:35:47 +0800 Subject: [PATCH 0058/1095] lite: remove memset in resize bilinear opt op memset() is not necessarily needed here since every element in output_data memory will be updated. --- tensorflow/lite/kernels/internal/optimized/optimized_ops.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index 7bc6b324c5..a5374827e1 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -5387,9 +5387,6 @@ inline void ResizeBilinearGenericSmallChannel( int32 output_height, int32 output_width, float height_scale, float width_scale, const RuntimeShape& input_shape, const T* input_data, const RuntimeShape& output_shape, T* output_data) { - memset(output_data, 0, - batches * output_height * output_width * depth * sizeof(T)); - T* output_ptr = &output_data[0]; for (int b = 0; b < batches; ++b) { for (int y = 0; y < output_height; ++y) { -- GitLab From 315e009f917aebe04a754a046a57cb3ac1d10557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=8C=AF=E5=8D=8E=20=28WANG=20Zhenhua=29?= Date: Sat, 22 Dec 2018 10:36:50 +0800 Subject: [PATCH 0059/1095] lite: perform std::floor before cast float to int This won't change the resulted value, but to have same code style in the context and reference_ops. --- tensorflow/lite/kernels/internal/optimized/optimized_ops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h index a5374827e1..8f09fab4ca 100644 --- a/tensorflow/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/lite/kernels/internal/optimized/optimized_ops.h @@ -5395,7 +5395,7 @@ inline void ResizeBilinearGenericSmallChannel( int32 y1 = std::min(y0 + 1, input_height - 1); for (int x = 0; x < output_width; ++x) { float input_x = x * width_scale; - int32 x0 = static_cast(input_x); + int32 x0 = static_cast(std::floor((input_x))); int32 x1 = std::min(x0 + 1, input_width - 1); int32 input_offset[4] = {Offset(input_shape, b, y0, x0, 0), -- GitLab From 6ecb77b9bd800e2beda7a7fcefaf0ac3a6363e1f Mon Sep 17 00:00:00 2001 From: Samantha Andow Date: Wed, 26 Dec 2018 08:58:44 -0800 Subject: [PATCH 0060/1095] Fix iterable bug --- tensorflow/java/src/gen/cc/op_specs.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/java/src/gen/cc/op_specs.cc b/tensorflow/java/src/gen/cc/op_specs.cc index 4f5a491d25..4024efedef 100644 --- a/tensorflow/java/src/gen/cc/op_specs.cc +++ b/tensorflow/java/src/gen/cc/op_specs.cc @@ -91,11 +91,6 @@ class TypeResolver { Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { *iterable_out = false; - if (!arg_def.number_attr().empty()) { - // when number_attr is set, argument has to be a list of tensors - *iterable_out = true; - visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); - } Type type = Type::Wildcard(); if (arg_def.type() != DataType::DT_INVALID) { type = Type::ForDataType(arg_def.type()); @@ -122,6 +117,11 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) { LOG(FATAL) << "Cannot resolve data type of argument \"" << arg_def.name() << "\" in operation \"" << op_def_.name() << "\""; } + if (!arg_def.number_attr().empty()) { + // when number_attr is set, argument has to be a list of tensors + *iterable_out = true; + visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int())); + } return type; } -- GitLab From 17bc7e61e566db5b3288b2afe6dd8cbc1152c2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Tue, 18 Dec 2018 08:51:25 +0800 Subject: [PATCH 0061/1095] ENH: The gradient op of bias_add supports 3/4/5D NCHW format --- tensorflow/core/kernels/bias_op.cc | 46 +++++++++---------- tensorflow/core/kernels/bias_op_gpu.cu.cc | 4 +- tensorflow/core/kernels/bias_op_gpu.h | 2 +- .../python/kernel_tests/bias_op_test.py | 38 ++++++++++++--- tensorflow/python/ops/nn_grad.py | 6 +-- 5 files changed, 58 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/kernels/bias_op.cc b/tensorflow/core/kernels/bias_op.cc index d4f4b43d63..df12dafdeb 100644 --- a/tensorflow/core/kernels/bias_op.cc +++ b/tensorflow/core/kernels/bias_op.cc @@ -18,13 +18,13 @@ limitations under the License. #define EIGEN_USE_THREADS #include "tensorflow/core/kernels/bias_op.h" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/util/tensor_format.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA #include "tensorflow/core/kernels/bias_op_gpu.h" @@ -153,13 +153,13 @@ class BiasOp : public BinaryOp { bias.tensor().reshape(four_dims).broadcast(broad_cast_dims); } break; case 5: { - Eigen::DSizes four_dims(1, channel, 1, 1, 1); + Eigen::DSizes five_dims(1, channel, 1, 1, 1); Eigen::DSizes broad_cast_dims(batch, 1, height, width, depth); const Device& d = context->eigen_device(); output->tensor().device(d) = input.tensor() + - bias.tensor().reshape(four_dims).broadcast(broad_cast_dims); + bias.tensor().reshape(five_dims).broadcast(broad_cast_dims); } break; default: OP_REQUIRES(context, false, @@ -269,28 +269,24 @@ class BiasGradOp : public OpKernel { output->template flat().setZero(); } else { // Added by intel_tf to support NCHW on CPU regardless of MKL used or not. - // TODO(yongtang): Add 3/4/5 dimensional data support for NCHW format. if (data_format_ == FORMAT_NCHW) { - OP_REQUIRES(context, output_backprop.dims() == 4, - errors::InvalidArgument( - "NCHW format supports only 4D input/output tensor.")); - Eigen::DSizes four_dims(batch, channel, height, width); + Eigen::DSizes three_dims(batch, channel, + height * width * depth); #ifdef EIGEN_HAS_INDEX_LIST using idx0 = Eigen::type2index<0>; using idx2 = Eigen::type2index<2>; - using idx3 = Eigen::type2index<3>; - Eigen::IndexList reduction_axes; + Eigen::IndexList reduction_axes; #else - Eigen::array reduction_axes = {0, 2, 3}; + Eigen::array reduction_axes = {0, 2}; #endif output->template flat().device(context->eigen_device()) = output_backprop.flat() .template cast::type>() - .reshape(four_dims) + .reshape(three_dims) .sum(reduction_axes) .template cast(); // End of code by intel_tf. } else { - Eigen::DSizes two_dims(batch * height * width, + Eigen::DSizes two_dims(batch * height * width * depth, channel); #ifdef EIGEN_HAS_INDEX_LIST Eigen::IndexList > reduction_axis; @@ -496,21 +492,21 @@ class BiasGradOp : public OpKernel { void ComputeWithCustomKernel(OpKernelContext* context, const Tensor& output_backprop, int32 batch, - int32 width, int32 height, int32 channel, - Tensor* output) { + int32 width, int32 height, int32 depth, + int32 channel, Tensor* output) { BiasGradGPU::compute(context->template eigen_device(), output_backprop.template flat().data(), output->flat().data(), batch, width, height, - channel, data_format_); + depth, channel, data_format_); } void ComputeWithReduceSum(OpKernelContext* context, const Tensor& output_backprop, int32 batch, - int32 width, int32 height, int32 channel, - Tensor* output) { + int32 width, int32 height, int32 depth, + int32 channel, Tensor* output) { if (data_format_ == FORMAT_NCHW) { int32 row_count = batch * channel; - int32 col_count = height * width; + int32 col_count = height * width * depth; Tensor temp_grad_outputs; // For 'NCHW' format, we perform reduction twice: first HW, then N. TensorShape temp_grad_output_shape{row_count, col_count}; @@ -528,7 +524,7 @@ class BiasGradOp : public OpKernel { row_count, col_count); } else { // For 'NHWC', we simply apply reduction once on NHW. - int32 row_count = batch * height * width; + int32 row_count = batch * height * width * depth; int32 col_count = channel; BiasGradGPU::DoColReduction( context, const_cast(output->flat().data()), @@ -561,7 +557,7 @@ class BiasGradOp : public OpKernel { int device_id = stream->parent()->device_ordinal(); DataType dtype = output_backprop.dtype(); BiasAddParams bias_parameters = { - {batch, height * width, channel}, + {batch, height * width * depth, channel}, data_format_, dtype, device_id, @@ -576,7 +572,7 @@ class BiasGradOp : public OpKernel { stream->InitTimer(&timer); stream->ThenStartTimer(&timer); ComputeWithCustomKernel(context, output_backprop, batch, width, height, - channel, output); + depth, channel, output); stream->ThenStopTimer(&timer); uint64 elapsed_microseconds = timer.Microseconds(); VLOG(1) << "BiasAddGrad " << bias_parameters.ToString() @@ -589,7 +585,7 @@ class BiasGradOp : public OpKernel { // Try reduction and profile. stream->ThenStartTimer(&timer); ComputeWithReduceSum(context, output_backprop, batch, width, height, - channel, output); + depth, channel, output); stream->ThenStopTimer(&timer); elapsed_microseconds = timer.Microseconds(); @@ -610,11 +606,11 @@ class BiasGradOp : public OpKernel { // Choose the best algorithm based on autotune results. if (algo_config.get_mode() == BiasAddGradGPUMode::kReduction) { ComputeWithReduceSum(context, output_backprop, batch, width, height, - channel, output); + depth, channel, output); } else { // Default to the customized kernel. ComputeWithCustomKernel(context, output_backprop, batch, width, height, - channel, output); + depth, channel, output); } } diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc index 24fea8a8e6..006fa1dc71 100644 --- a/tensorflow/core/kernels/bias_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc @@ -195,10 +195,10 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop, template void BiasGradGPU::compute(const GPUDevice& d, const T* output_backprop, T* bias_backprop, int32 batch, int32 height, - int32 width, int32 channel, + int32 width, int32 depth, int32 channel, TensorFormat data_format) { const int32 bias_size = channel; - const int32 image_size = height * width; + const int32 image_size = height * width * depth; const int32 total_count = batch * bias_size * image_size; if (total_count == 0) { return; diff --git a/tensorflow/core/kernels/bias_op_gpu.h b/tensorflow/core/kernels/bias_op_gpu.h index a0b2ce4f9b..372a403e68 100644 --- a/tensorflow/core/kernels/bias_op_gpu.h +++ b/tensorflow/core/kernels/bias_op_gpu.h @@ -39,7 +39,7 @@ template struct BiasGradGPU { static void compute(const GPUDevice& device, const T* output_backprop, T* bias_backprop, int32 batch, int32 height, int32 width, - int32 channel, TensorFormat data_format); + int32 depth, int32 channel, TensorFormat data_format); static void DoRowReduction(OpKernelContext* context, T* output, const T* input, int rows, int cols); diff --git a/tensorflow/python/kernel_tests/bias_op_test.py b/tensorflow/python/kernel_tests/bias_op_test.py index 66f442dbdd..c3976194a0 100644 --- a/tensorflow/python/kernel_tests/bias_op_test.py +++ b/tensorflow/python/kernel_tests/bias_op_test.py @@ -196,9 +196,7 @@ class BiasAddTest(test.TestCase): self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold) @test_util.run_deprecated_v1 - def testGradientTensor(self): - # TODO(yongtang): BiasAddGrad with NCHW only works 4D. Reenable once - # all dimensions are supported. + def testGradientTensor2D(self): for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True): for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): np_input = np.array( @@ -207,9 +205,19 @@ class BiasAddTest(test.TestCase): bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) self._testGradient(np_input, bias, dtype, data_format, use_gpu) + @test_util.run_deprecated_v1 + def testGradientTensor3D(self): + for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True), + ("NCHW", False), ("NCHW", True)]: + for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): + np_input = np.array( + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], + dtype=dtype.as_numpy_dtype).reshape(1, 3, 2) + bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) + self._testGradient(np_input, bias, dtype, data_format, use_gpu) + @test_util.run_deprecated_v1 def testGradientTensor4D(self): - # BiasAddGrad with NCHW support 4D so all are enabled. for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True), ("NCHW", False), ("NCHW", True)]: for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): @@ -219,6 +227,17 @@ class BiasAddTest(test.TestCase): bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) self._testGradient(np_input, bias, dtype, data_format, use_gpu) + @test_util.run_deprecated_v1 + def testGradientTensor5D(self): + for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True), + ("NCHW", False), ("NCHW", True)]: + for dtype in (dtypes.float16, dtypes.float32, dtypes.float64): + np_input = np.arange( + 1.0, 49.0, dtype=dtype.as_numpy_dtype).reshape( + [1, 2, 3, 4, 2]).astype(np.float32) + bias = np.array([1.3, 2.4], dtype=dtype.as_numpy_dtype) + self._testGradient(np_input, bias, dtype, data_format, use_gpu) + @test_util.run_deprecated_v1 def testEmpty(self): np.random.seed(7) @@ -227,10 +246,15 @@ class BiasAddTest(test.TestCase): @test_util.run_deprecated_v1 def testEmptyGradient(self): - # TODO(yongtang): BiasAddGrad with NCHW only works 4D. Reenable once - # all dimensions are supported. for (data_format, use_gpu) in ("NHWC", False), ("NHWC", True): - for shape in (0, 0), (2, 0), (0, 2), (4, 3, 0), (4, 0, 3), (0, 4, 3): + for shape in (0, 0), (2, 0), (0, 2): + self._testGradient( + np.random.randn(*shape), + np.random.randn(shape[-1]), dtypes.float64, data_format, use_gpu) + + for (data_format, use_gpu) in [("NHWC", False), ("NHWC", True), + ("NCHW", False), ("NCHW", True)]: + for shape in (4, 3, 0), (4, 0, 3), (0, 4, 3): self._testGradient( np.random.randn(*shape), np.random.randn(shape[-1]), dtypes.float64, data_format, use_gpu) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 34404edc9a..7131e4abc4 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -314,10 +314,10 @@ def _BiasAddGradGrad(op, received_grad): if data_format == b"NCHW": expanded_shape = array_ops.concat([ - array_ops.ones_like(shape[:-3]), bias_shape, - array_ops.ones_like(shape[-2:]) + array_ops.ones_like(shape[:1]), bias_shape, + array_ops.ones_like(shape[2:]) ], 0) - tile_mults = array_ops.concat([shape[:-3], [1], shape[-2:]], 0) + tile_mults = array_ops.concat([shape[:1], [1], shape[2:]], 0) else: expanded_shape = array_ops.concat( [array_ops.ones_like(shape[:-1]), bias_shape], 0) -- GitLab From 96f41397357f96c41459cf351f8853caa8d724c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aur=C3=A9lien=20Geron?= Date: Fri, 28 Dec 2018 15:18:06 +0800 Subject: [PATCH 0062/1095] Fix typo in the documentation of tf.function "`add_noise()` will return a different output every time it is invoked. However, `add_noise` will return the same value every time it is called..." => the second `add_noise` should be `traced` --- tensorflow/python/eager/def_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/eager/def_function.py b/tensorflow/python/eager/def_function.py index ebc47d1566..b25df5a47c 100644 --- a/tensorflow/python/eager/def_function.py +++ b/tensorflow/python/eager/def_function.py @@ -765,7 +765,7 @@ def function(func=None, ``` `add_noise()` will return a different output every time it is invoked. - However, `add_noise` will return the same value every time it is called, + However, `traced()` will return the same value every time it is called, since a particular random value generated by the `np.random.randn` call will be inserted in the traced/staged TensorFlow graph as a constant. In this particular example, replacing `np.random.randn(5, 5)` with -- GitLab From 844ad0da4bb38192f8ef17a631c0f3a37b85ce5f Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Fri, 28 Dec 2018 10:23:33 -0800 Subject: [PATCH 0063/1095] fix for conv_ops_test --- tensorflow/core/graph/mkl_layout_pass.cc | 18 +++ tensorflow/core/kernels/conv_ops_test.cc | 134 ++++++++++++----------- 2 files changed, 90 insertions(+), 62 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 9495132f4a..41898eadf9 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -843,6 +843,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { CHECK_NOTNULL(m); Node* n = nullptr; + DataType T_m; + TF_CHECK_OK(GetNodeAttr(m->def(), "T", &T_m)); + + // Don't try to merge if datatype is not DT_FLOAT + if (T_m != DT_FLOAT) return n; + if (m->type_string() == csinfo_.bias_add) { // If a is BiasAdd, then Conv2D is 0th input of BiasAdd. TF_CHECK_OK(m->input_node(0, &n)); @@ -877,6 +883,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { DCHECK(m); Node* n = nullptr; + DataType T_m; + TF_CHECK_OK(GetNodeAttr(m->def(), "T", &T_m)); + + // Don't try to merge if datatype is not DT_FLOAT + if (T_m != DT_FLOAT) return n; + const Node* conv_node; if (m->type_string() == csinfo_.pad) { // If m is Pad, then Conv2D is the output of Pad. @@ -935,6 +947,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { CHECK_NOTNULL(m); Node* n = nullptr; + DataType T_m; + TF_CHECK_OK(GetNodeAttr(m->def(), "T", &T_m)); + + // Don't try to merge if datatype is not DT_FLOAT + if (T_m != DT_FLOAT) return n; + if (m->type_string() == csinfo_.bias_add_grad) { // Get 1st input 'g' of BiasAddGrad. Node* g = nullptr; diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index ae4132bb0a..d144576249 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -770,7 +770,15 @@ class FusedConv2DOpTest : public OpsTestBase { ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype()); ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape()); - test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); + // NOTE(intel-tf): When filter_size is equal to the input image size, + // conv2d essentially is element-wise multiplication followed by + // a full sum reduction, which causes larger numerical error + // than usual cases. + if (image_width == filter_size && image_height == filter_size) { + test::ExpectTensorNear(conv_2d, fused_conv_2d, 1e-4); + } else { + test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); + } } void VerifyFusedBatchNormTensorsNear(int depth, int image_width, @@ -812,7 +820,15 @@ class FusedConv2DOpTest : public OpsTestBase { ASSERT_EQ(conv_2d.dtype(), fused_conv_2d.dtype()); ASSERT_EQ(conv_2d.shape(), fused_conv_2d.shape()); - test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); + // NOTE(intel-tf): When filter_size is equal to the input image size, + // conv2d essentially is element-wise multiplication followed by + // a full sum reduction, which causes larger numerical error + // than usual cases. + if (image_width == filter_size && image_height == filter_size) { + test::ExpectTensorNear(conv_2d, fused_conv_2d, 1e-4); + } else { + test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); + } } // Verifies that computing Conv2D+BiasAdd in a graph is identical to @@ -821,16 +837,15 @@ class FusedConv2DOpTest : public OpsTestBase { int depth = kDepth, int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { - const BiasAddGraphRunner run_default = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, Tensor* out) { - RunConv2DWithBias(input_data, filter_data, bias_data, out); - }; - - const BiasAddGraphRunner run_fused = [this](const Tensor& input_data, - const Tensor& filter_data, - const Tensor& bias_data, - Tensor* out) { + const BiasAddGraphRunner run_default = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& bias_data, Tensor* out) { + RunConv2DWithBias(input_data, filter_data, bias_data, out); + }; + + const BiasAddGraphRunner run_fused = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& bias_data, Tensor* out) { RunFusedConv2DOp(input_data, filter_data, {bias_data}, {"BiasAdd"}, out); }; @@ -846,19 +861,19 @@ class FusedConv2DOpTest : public OpsTestBase { int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { - const BiasAddGraphRunner run_default = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, Tensor* out) { - RunConv2DWithBiasAndRelu(input_data, filter_data, bias_data, out, - /*allow_gpu_device=*/true); - }; - - const BiasAddGraphRunner run_fused = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& bias_data, Tensor* out) { - RunFusedConv2DOp(input_data, filter_data, {bias_data}, - {"BiasAdd", "Relu"}, out, /*allow_gpu_device=*/true); - }; + const BiasAddGraphRunner run_default = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& bias_data, Tensor* out) { + RunConv2DWithBiasAndRelu(input_data, filter_data, bias_data, out, + /*allow_gpu_device=*/true); + }; + + const BiasAddGraphRunner run_fused = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& bias_data, Tensor* out) { + RunFusedConv2DOp(input_data, filter_data, {bias_data}, + {"BiasAdd", "Relu"}, out, /*allow_gpu_device=*/true); + }; VerifyBiasAddTensorsNear(depth, image_width, image_height, image_batch_count, filter_size, filter_count, @@ -872,24 +887,22 @@ class FusedConv2DOpTest : public OpsTestBase { int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { - const BatchNormGraphRunner run_default = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, const Tensor& variance_data, - Tensor* out) { - RunConv2DWithBatchNorm(input_data, filter_data, scale_data, - offset_data, mean_data, variance_data, out); - }; - - const BatchNormGraphRunner run_fused = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, const Tensor& variance_data, - Tensor* out) { - RunFusedConv2DOp(input_data, filter_data, - {scale_data, offset_data, mean_data, variance_data}, - {"FusedBatchNorm"}, out); - }; + const BatchNormGraphRunner run_default = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { + RunConv2DWithBatchNorm(input_data, filter_data, scale_data, offset_data, + mean_data, variance_data, out); + }; + + const BatchNormGraphRunner run_fused = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { + RunFusedConv2DOp(input_data, filter_data, + {scale_data, offset_data, mean_data, variance_data}, + {"FusedBatchNorm"}, out); + }; VerifyFusedBatchNormTensorsNear(depth, image_width, image_height, image_batch_count, filter_size, @@ -902,25 +915,22 @@ class FusedConv2DOpTest : public OpsTestBase { int filter_size, int filter_count, int depth = kDepth, int image_width = kImageWidth, int image_height = kImageHeight, int image_batch_count = kImageBatchCount) { - const BatchNormGraphRunner run_default = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, const Tensor& variance_data, - Tensor* out) { - RunConv2DWithBatchNormAndRelu(input_data, filter_data, scale_data, - offset_data, mean_data, variance_data, - out); - }; - - const BatchNormGraphRunner run_fused = - [this](const Tensor& input_data, const Tensor& filter_data, - const Tensor& scale_data, const Tensor& offset_data, - const Tensor& mean_data, const Tensor& variance_data, - Tensor* out) { - RunFusedConv2DOp(input_data, filter_data, - {scale_data, offset_data, mean_data, variance_data}, - {"FusedBatchNorm", "Relu"}, out); - }; + const BatchNormGraphRunner run_default = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { + RunConv2DWithBatchNormAndRelu(input_data, filter_data, scale_data, + offset_data, mean_data, variance_data, out); + }; + + const BatchNormGraphRunner run_fused = [this]( + const Tensor& input_data, const Tensor& filter_data, + const Tensor& scale_data, const Tensor& offset_data, + const Tensor& mean_data, const Tensor& variance_data, Tensor* out) { + RunFusedConv2DOp(input_data, filter_data, + {scale_data, offset_data, mean_data, variance_data}, + {"FusedBatchNorm", "Relu"}, out); + }; VerifyFusedBatchNormTensorsNear(depth, image_width, image_height, image_batch_count, filter_size, -- GitLab From 00fd6acd0b80cbc2be73e8b89cfebc01cb4cf9e8 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 28 Dec 2018 12:11:49 -0800 Subject: [PATCH 0064/1095] Disable optimizer_v2_test everywhere. PiperOrigin-RevId: 227157901 --- tensorflow/python/keras/optimizer_v2/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/keras/optimizer_v2/BUILD b/tensorflow/python/keras/optimizer_v2/BUILD index b6be6a90af..05f6e5ac48 100644 --- a/tensorflow/python/keras/optimizer_v2/BUILD +++ b/tensorflow/python/keras/optimizer_v2/BUILD @@ -176,7 +176,11 @@ py_test( srcs = ["optimizer_v2_test.py"], shard_count = 8, tags = [ + "manual", + "no_gpu", + "no_oss", "no_windows", + "notap", ], deps = [ ":optimizer_v2", -- GitLab From cd2f18ff27fc2a9b502c80bf1d4a0abfc3577460 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Fri, 28 Dec 2018 13:34:54 -0800 Subject: [PATCH 0065/1095] [XLA] Add parser, proto and verifier support for DS/DUS with scalar indices. Since this form is not yet documented, supported by HLO passes, or any backends, the verifier will only accept it under a flag. PiperOrigin-RevId: 227165366 --- tensorflow/compiler/xla/client/xla_builder.cc | 8 +- tensorflow/compiler/xla/service/BUILD | 19 +- .../xla/service/hlo_creation_utils.cc | 4 +- .../xla/service/hlo_evaluator_typed_visitor.h | 14 +- .../compiler/xla/service/hlo_instruction.cc | 69 ++++++- .../compiler/xla/service/hlo_instruction.h | 9 + .../compiler/xla/service/hlo_instructions.cc | 34 +++- .../compiler/xla/service/hlo_instructions.h | 28 ++- tensorflow/compiler/xla/service/hlo_parser.cc | 27 ++- .../compiler/xla/service/hlo_parser_test.cc | 33 ++- .../compiler/xla/service/hlo_verifier.cc | 39 +++- .../compiler/xla/service/hlo_verifier_test.cc | 51 +++++ .../compiler/xla/service/shape_inference.cc | 190 +++++++++++++----- .../compiler/xla/service/shape_inference.h | 7 +- tensorflow/compiler/xla/xla.proto | 6 +- 15 files changed, 433 insertions(+), 105 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 1c6fe0ab5d..4b752def1a 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -691,7 +691,7 @@ XlaOp XlaBuilder::DynamicSlice(const XlaOp& operand, const XlaOp& start_indices, GetShape(start_indices)); TF_ASSIGN_OR_RETURN(Shape shape, ShapeInference::InferDynamicSliceShape( - operand_shape, start_indices_shape, slice_sizes)); + operand_shape, {start_indices_shape}, slice_sizes)); *instr.mutable_shape() = shape.ToProto(); for (int64 size : slice_sizes) { @@ -712,9 +712,9 @@ XlaOp XlaBuilder::DynamicUpdateSlice(const XlaOp& operand, const XlaOp& update, TF_ASSIGN_OR_RETURN(const Shape& update_shape, GetShape(update)); TF_ASSIGN_OR_RETURN(const Shape& start_indices_shape, GetShape(start_indices)); - TF_ASSIGN_OR_RETURN(Shape shape, - ShapeInference::InferDynamicUpdateSliceShape( - operand_shape, update_shape, start_indices_shape)); + TF_ASSIGN_OR_RETURN( + Shape shape, ShapeInference::InferDynamicUpdateSliceShape( + operand_shape, update_shape, {start_indices_shape})); *instr.mutable_shape() = shape.ToProto(); return AddInstruction(std::move(instr), HloOpcode::kDynamicUpdateSlice, diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index d8736c8196..201646e70d 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1,6 +1,14 @@ # Description: # XLA service implementation. +load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test") +load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library") +load( + "//tensorflow/core:platform/default/build_config.bzl", + "tf_proto_library_py", +) +load("//tensorflow:tensorflow.bzl", "tf_cc_binary", "tf_cc_test") + licenses(["notice"]) # Apache 2.0 package(default_visibility = [":friends"]) @@ -12,15 +20,6 @@ package_group( ], ) -load("//tensorflow/compiler/xla/tests:build_defs.bzl", "xla_test") -load("//tensorflow/compiler/xla:xla.bzl", "xla_proto_library") -load("//tensorflow:tensorflow.bzl", "tf_cc_test") -load("//tensorflow:tensorflow.bzl", "tf_cc_binary") -load( - "//tensorflow/core:platform/default/build_config.bzl", - "tf_proto_library_py", -) - xla_proto_library( name = "hlo_proto", srcs = ["hlo.proto"], @@ -2592,6 +2591,7 @@ tf_cc_test( srcs = ["hlo_verifier_test.cc"], deps = [ ":hlo", + ":hlo_module_config", ":hlo_parser", ":hlo_verifier", ":layout_assignment", @@ -2599,6 +2599,7 @@ tf_cc_test( "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:types", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla:xla_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc index 1678fba172..8cea95a73e 100644 --- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -108,7 +108,7 @@ StatusOr MakeDynamicSliceHlo( TF_ASSIGN_OR_RETURN( Shape dynamic_slice_shape, ShapeInference::InferDynamicSliceShape( - operand->shape(), start_indices->shape(), slice_sizes)); + operand->shape(), {start_indices->shape()}, slice_sizes)); return computation->AddInstruction(HloInstruction::CreateDynamicSlice( dynamic_slice_shape, operand, start_indices, slice_sizes)); } @@ -122,7 +122,7 @@ StatusOr MakeDynamicUpdateSliceHlo( TF_ASSIGN_OR_RETURN( Shape dynamic_update_slice_shape, ShapeInference::InferDynamicUpdateSliceShape( - operand->shape(), update->shape(), start_indices->shape())); + operand->shape(), update->shape(), {start_indices->shape()})); return computation->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( dynamic_update_slice_shape, operand, update, start_indices)); } diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 783f87ea64..70ddedf62b 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -1406,10 +1406,12 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { auto operand = dynamic_slice->operand(0); auto start_indices = dynamic_slice->operand(1); auto result_shape = dynamic_slice->shape(); - TF_ASSIGN_OR_RETURN(auto inferred_return_shape, - ShapeInference::InferDynamicSliceShape( - operand->shape(), start_indices->shape(), - dynamic_slice->dynamic_slice_sizes())); + TF_ASSIGN_OR_RETURN( + auto inferred_return_shape, + ShapeInference::InferDynamicSliceShape( + operand->shape(), + Cast(dynamic_slice)->index_shapes(), + dynamic_slice->dynamic_slice_sizes())); TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) << "return shape is set to: " << ShapeUtil::HumanString(result_shape) << " but is inferred to be: " @@ -1464,7 +1466,9 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { TF_ASSIGN_OR_RETURN( auto inferred_return_shape, ShapeInference::InferDynamicUpdateSliceShape( - operand->shape(), update->shape(), start_indices->shape())); + operand->shape(), update->shape(), + Cast(dynamic_update_slice) + ->index_shapes())); TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) << "return shape is set to: " << ShapeUtil::HumanString(result_shape) << " but is inferred to be: " diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 2915535987..543fb66c01 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -31,6 +31,7 @@ limitations under the License. #include "absl/strings/numbers.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" +#include "absl/types/span.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/protobuf_util.h" @@ -452,13 +453,50 @@ StatusOr> HloInstruction::CreateFromProto( CreatePad(shape, operands(0), operands(1), proto.padding_config()); break; case HloOpcode::kDynamicSlice: { - TF_RET_CHECK(proto.operand_ids_size() == 2) - << "DynamicSlice instruction should have 2 operands but sees " - << proto.operand_ids_size(); std::vector slice_sizes(proto.dynamic_slice_sizes_size()); absl::c_copy(proto.dynamic_slice_sizes(), slice_sizes.begin()); - instruction = - CreateDynamicSlice(shape, operands(0), operands(1), slice_sizes); + TF_RET_CHECK(proto.operand_ids_size() >= 1) + << "DynamicSlice instruction should have at least 1 operands but " + "sees " + << proto.operand_ids_size(); + if (proto.operand_ids_size() == 2 && operands(1)->shape().rank() == 1) { + // TODO(b/118437727): Old form, remove this path. + instruction = + CreateDynamicSlice(shape, operands(0), operands(1), slice_sizes); + } else { + // New form + auto expected_operands = 1 + operands(0)->shape().rank(); + TF_RET_CHECK(proto.operand_ids_size() == expected_operands) + << "DynamicSlice instruction should have " << expected_operands + << " operands, but has " << proto.operand_ids_size(); + const auto& operand_vector = all_operands(); + instruction = CreateDynamicSlice( + shape, operands(0), absl::MakeSpan(operand_vector).subspan(1), + slice_sizes); + } + break; + } + case HloOpcode::kDynamicUpdateSlice: { + TF_RET_CHECK(proto.operand_ids_size() >= 2) + << "DynamicUpdateSlice instruction should have at least 2 operands " + "but sees " + << proto.operand_ids_size(); + if (proto.operand_ids_size() == 3 && operands(2)->shape().rank() == 1) { + // TODO(b/118437727): Old form, remove this path. + instruction = CreateDynamicUpdateSlice(shape, operands(0), operands(1), + operands(2)); + } else { + // New form + auto expected_operands = 2 + operands(0)->shape().rank(); + TF_RET_CHECK(proto.operand_ids_size() == expected_operands) + << "DynamicUpdateSlice instruction should have " + << expected_operands << " operands, but has " + << proto.operand_ids_size(); + const auto& operand_vector = all_operands(); + instruction = + CreateDynamicUpdateSlice(shape, operands(0), operands(1), + absl::MakeSpan(operand_vector).subspan(2)); + } break; } case HloOpcode::kGather: { @@ -917,6 +955,14 @@ HloInstruction::CreateAddDependency(HloInstruction* data_operand, shape, operand, start_indices, slice_sizes); } +/* static */ std::unique_ptr HloInstruction::CreateDynamicSlice( + const Shape& shape, HloInstruction* operand, + absl::Span start_indices, + absl::Span slice_sizes) { + return absl::make_unique( + shape, operand, start_indices, slice_sizes); +} + /* static */ std::unique_ptr HloInstruction::CreateDynamicUpdateSlice(const Shape& shape, HloInstruction* operand, @@ -926,6 +972,14 @@ HloInstruction::CreateDynamicUpdateSlice(const Shape& shape, shape, operand, update, start_indices); } +/* static */ std::unique_ptr +HloInstruction::CreateDynamicUpdateSlice( + const Shape& shape, HloInstruction* operand, HloInstruction* update, + absl::Span start_indices) { + return absl::make_unique( + shape, operand, update, start_indices); +} + /* static */ std::unique_ptr HloInstruction::CreateConcatenate( const Shape& shape, absl::Span operands, int64 dimension) { @@ -1382,9 +1436,8 @@ std::unique_ptr HloInstruction::CloneWithNewOperands( clone = CreateReshape(shape, new_operands[0]); break; case HloOpcode::kDynamicUpdateSlice: - CHECK_EQ(new_operands.size(), 3); clone = CreateDynamicUpdateSlice(shape, new_operands[0], new_operands[1], - new_operands[2]); + new_operands.subspan(2)); break; case HloOpcode::kTuple: clone = CreateTuple(new_operands); @@ -2824,7 +2877,7 @@ HloInstruction::UseKind HloInstruction::OperandElementUse(int64 i) const { } return UseKind::kReuse; case HloOpcode::kDynamicUpdateSlice: - // Dynamic-update-slice reuses only operand 2 (start_indices). + // Dynamic-update-slice reuses only start_indices. if (i == 0 || i == 1) { return UseKind::kUse; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 36e1ab4931..362d07e64f 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -559,12 +559,21 @@ class HloInstruction { static std::unique_ptr CreateDynamicSlice( const Shape& shape, HloInstruction* operand, HloInstruction* start_indices, absl::Span slice_sizes); + // Same as above, but expects a span of scalar start indices. + static std::unique_ptr CreateDynamicSlice( + const Shape& shape, HloInstruction* operand, + absl::Span start_indices, + absl::Span slice_sizes); // Creates a dynamic update slice instruction, which updates a slice // of 'operand' with 'update' and 'start_indices'. static std::unique_ptr CreateDynamicUpdateSlice( const Shape& shape, HloInstruction* operand, HloInstruction* update, HloInstruction* start_indices); + // Same as above, but expects a span of scalar start indices. + static std::unique_ptr CreateDynamicUpdateSlice( + const Shape& shape, HloInstruction* operand, HloInstruction* update, + absl::Span start_indices); // Creates a concatenate instruction, where the operands are concatenated on // the provided dimension. diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 1f37b284a2..7a3fb6d9a2 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -2007,6 +2007,18 @@ HloDynamicSliceInstruction::HloDynamicSliceInstruction( AppendOperand(start_indices); } +HloDynamicSliceInstruction::HloDynamicSliceInstruction( + const Shape& shape, HloInstruction* operand, + absl::Span start_indices, + absl::Span slice_sizes) + : HloDynamicIndexInstruction(HloOpcode::kDynamicSlice, shape), + dynamic_slice_sizes_(slice_sizes.begin(), slice_sizes.end()) { + AppendOperand(operand); + for (HloInstruction* index : start_indices) { + AppendOperand(index); + } +} + HloDynamicUpdateSliceInstruction::HloDynamicUpdateSliceInstruction( const Shape& shape, HloInstruction* operand, HloInstruction* update, HloInstruction* start_indices) @@ -2016,6 +2028,17 @@ HloDynamicUpdateSliceInstruction::HloDynamicUpdateSliceInstruction( AppendOperand(start_indices); } +HloDynamicUpdateSliceInstruction::HloDynamicUpdateSliceInstruction( + const Shape& shape, HloInstruction* operand, HloInstruction* update, + absl::Span start_indices) + : HloDynamicIndexInstruction(HloOpcode::kDynamicUpdateSlice, shape) { + AppendOperand(operand); + AppendOperand(update); + for (HloInstruction* index : start_indices) { + AppendOperand(index); + } +} + HloInstructionProto HloDynamicSliceInstruction::ToProto() const { HloInstructionProto proto = HloInstruction::ToProto(); for (int64 slice_size : dynamic_slice_sizes_) { @@ -2041,9 +2064,14 @@ std::unique_ptr HloDynamicSliceInstruction::CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const { - CHECK_EQ(new_operands.size(), 2); - return absl::make_unique( - shape, new_operands[0], new_operands[1], dynamic_slice_sizes_); + if (new_operands.size() == 2 && new_operands[1]->shape().rank() == 1) { + // TODO(b/118437727): Old form, remove this path. + return absl::make_unique( + shape, new_operands[0], new_operands[1], dynamic_slice_sizes_); + } else { + return absl::make_unique( + shape, new_operands[0], new_operands.subspan(1), dynamic_slice_sizes_); + } } HloGatherInstruction::HloGatherInstruction( diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index ca212c7f2c..83b6760683 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -1183,7 +1183,22 @@ class HloDynamicIndexInstruction : public HloInstruction { public: explicit HloDynamicIndexInstruction(HloOpcode opcode, const Shape& shape) : HloInstruction(opcode, shape) {} - virtual int64 index_operand_number() const = 0; + virtual int64 first_index_operand_number() const = 0; + + // Returns a subspan of operands which represent the start indices. + absl::Span index_operands() { + return absl::MakeSpan(operands()).subspan(first_index_operand_number()); + } + + // Returns the shapes of the index operands. + std::vector index_shapes() { + std::vector shapes; + auto indices = index_operands(); + for (const HloInstruction* index : indices) { + shapes.push_back(index->shape()); + } + return shapes; + } }; class HloDynamicSliceInstruction : public HloDynamicIndexInstruction { @@ -1192,6 +1207,10 @@ class HloDynamicSliceInstruction : public HloDynamicIndexInstruction { HloInstruction* operand, HloInstruction* start_indices, absl::Span slice_sizes); + explicit HloDynamicSliceInstruction( + const Shape& shape, HloInstruction* operand, + absl::Span start_indices, + absl::Span slice_sizes); // Old methods kept for smooth subclassing transition END. // Returns the size of the slice in the given dimension for a dynamic // slice node. @@ -1204,7 +1223,7 @@ class HloDynamicSliceInstruction : public HloDynamicIndexInstruction { // Returns a serialized representation of this instruction. HloInstructionProto ToProto() const override; - int64 index_operand_number() const override { return 1; } + int64 first_index_operand_number() const override { return 1; } private: std::vector ExtraAttributesToStringImpl( @@ -1229,8 +1248,11 @@ class HloDynamicUpdateSliceInstruction : public HloDynamicIndexInstruction { HloInstruction* operand, HloInstruction* update, HloInstruction* start_indices); + explicit HloDynamicUpdateSliceInstruction( + const Shape& shape, HloInstruction* operand, HloInstruction* update, + absl::Span start_indices); - int64 index_operand_number() const override { return 2; } + int64 first_index_operand_number() const override { return 2; } }; class HloGatherInstruction : public HloInstruction { diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 56848ce0e8..730d39500b 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -1171,24 +1171,39 @@ bool HloParser::ParseInstructionRhs(HloComputation::Builder* builder, optional> dynamic_slice_sizes; attrs["dynamic_slice_sizes"] = { /*required=*/true, AttrTy::kBracedInt64List, &dynamic_slice_sizes}; - if (!ParseOperands(&operands, /*expected_size=*/2) || - !ParseAttributes(attrs)) { + LocTy loc = lexer_.GetLoc(); + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } + if (operands.empty()) { + return Error(loc, "Expected at least one operand."); + } + if (!(operands.size() == 2 && operands[1]->shape().rank() == 1) && + operands.size() != 1 + operands[0]->shape().rank()) { + return Error(loc, "Wrong number of operands."); + } instruction = builder->AddInstruction(HloInstruction::CreateDynamicSlice( - shape, /*operand=*/operands[0], /*start_indices=*/operands[1], + shape, /*operand=*/operands[0], + /*start_indices=*/absl::MakeSpan(operands).subspan(1), *dynamic_slice_sizes)); break; } case HloOpcode::kDynamicUpdateSlice: { - if (!ParseOperands(&operands, /*expected_size=*/3) || - !ParseAttributes(attrs)) { + LocTy loc = lexer_.GetLoc(); + if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } + if (operands.size() < 2) { + return Error(loc, "Expected at least two operands."); + } + if (!(operands.size() == 3 && operands[2]->shape().rank() == 1) && + operands.size() != 2 + operands[0]->shape().rank()) { + return Error(loc, "Wrong number of operands."); + } instruction = builder->AddInstruction(HloInstruction::CreateDynamicUpdateSlice( shape, /*operand=*/operands[0], /*update=*/operands[1], - /*start_indices=*/operands[2])); + /*start_indices=*/absl::MakeSpan(operands).subspan(2))); break; } case HloOpcode::kTranspose: { diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index bc1a736766..76b8a5bc11 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -566,12 +566,26 @@ ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[1]) - ROOT %dynamic-slice = s32[2,2,258]{2,1,0} dynamic-slice(s32[2,2,258]{2,1,0} %original_parameter, s32[3]{0} %concatenate), dynamic_slice_sizes={2,2,258} } +)" +}, +// Dynamic slice with scalar indices +{ +"DynamicSliceScalarIndices", +R"(HloModule DynamicSlice_module + +ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[]) -> s32[2,2,258] { + %original_parameter = s32[2,2,258]{2,1,0} parameter(0) + %constant = s32[] constant(0) + %start_index = s32[] parameter(1) + ROOT %dynamic-slice = s32[2,2,258]{2,1,0} dynamic-slice(s32[2,2,258]{2,1,0} %original_parameter, s32[] %constant, s32[] %constant, s32[] %start_index), dynamic_slice_sizes={2,2,258} +} + )" }, // Dynamic update slice { "DynamicUpdateSlice", -R"(HloModule DynamicUpdateSlice_module +R"(HloModule DynamicSlice_module ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_indices: s32[4]) -> s32[1,1,25,1] { %input = s32[1,1,25,1]{3,2,1,0} parameter(0) @@ -580,6 +594,23 @@ ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_ ROOT %dynamic-update-slice = s32[1,1,25,1]{3,2,1,0} dynamic-update-slice(s32[1,1,25,1]{3,2,1,0} %input, s32[1,1,2,1]{3,2,1,0} %update, s32[4]{0} %start_indices) } +)" +}, +// Dynamic update slice with scalar indices +{ +"DynamicUpdateSliceScalarIndex", +R"(HloModule DynamicUpdateSlice_module + +ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_index.0: s32[], start_index.1: s32[], start_index.2: s32[], start_index.3: s32[]) -> s32[1,1,25,1] { + %input = s32[1,1,25,1]{3,2,1,0} parameter(0) + %update = s32[1,1,2,1]{3,2,1,0} parameter(1) + %start_index.0 = s32[] parameter(2) + %start_index.1 = s32[] parameter(3) + %start_index.2 = s32[] parameter(4) + %start_index.3 = s32[] parameter(5) + ROOT %dynamic-update-slice = s32[1,1,25,1]{3,2,1,0} dynamic-update-slice(s32[1,1,25,1]{3,2,1,0} %input, s32[1,1,2,1]{3,2,1,0} %update, s32[] %start_index.0, s32[] %start_index.1, s32[] %start_index.2, s32[] %start_index.3) +} + )" }, // batch norm training diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 8d8720d7be..14e29533c2 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -496,21 +496,38 @@ Status ShapeVerifier::HandleSlice(HloInstruction* slice) { } Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) { - TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_slice, 2)); - return CheckShape(dynamic_slice, ShapeInference::InferDynamicSliceShape( - dynamic_slice->operand(0)->shape(), - dynamic_slice->operand(1)->shape(), - dynamic_slice->dynamic_slice_sizes())); + const DebugOptions& debug_options = + dynamic_slice->GetModule()->config().debug_options(); + const bool allow_scalar_indices = + debug_options.xla_allow_scalar_index_dynamic_ops(); + if (!allow_scalar_indices) { + TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_slice, 2)); + } + return CheckShape( + dynamic_slice, + ShapeInference::InferDynamicSliceShape( + dynamic_slice->operand(0)->shape(), + Cast(dynamic_slice)->index_shapes(), + dynamic_slice->dynamic_slice_sizes(), allow_scalar_indices)); } Status ShapeVerifier::HandleDynamicUpdateSlice( HloInstruction* dynamic_update_slice) { - TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_update_slice, 3)); - return CheckShape(dynamic_update_slice, - ShapeInference::InferDynamicUpdateSliceShape( - dynamic_update_slice->operand(0)->shape(), - dynamic_update_slice->operand(1)->shape(), - dynamic_update_slice->operand(2)->shape())); + const DebugOptions& debug_options = + dynamic_update_slice->GetModule()->config().debug_options(); + const bool allow_scalar_indices = + debug_options.xla_allow_scalar_index_dynamic_ops(); + if (!allow_scalar_indices) { + TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_update_slice, 3)); + } + return CheckShape( + dynamic_update_slice, + ShapeInference::InferDynamicUpdateSliceShape( + dynamic_update_slice->operand(0)->shape(), + dynamic_update_slice->operand(1)->shape(), + Cast(dynamic_update_slice) + ->index_shapes(), + allow_scalar_indices)); } Status ShapeVerifier::HandleTuple(HloInstruction* tuple) { diff --git a/tensorflow/compiler/xla/service/hlo_verifier_test.cc b/tensorflow/compiler/xla/service/hlo_verifier_test.cc index 4bc557e4e6..91f247a9bb 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier_test.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_parser.h" #include "tensorflow/compiler/xla/service/layout_assignment.h" @@ -27,6 +28,7 @@ limitations under the License. #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla.pb.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -386,6 +388,55 @@ TEST_F(HloVerifierTest, AddWithLayoutChange) { ASSERT_TRUE(status.ok()); } +TEST_F(HloVerifierTest, ScalarIndexDynamicSlice) { + const char* const kScalarIndexDynamicSlice = R"( + HloModule DynamicSlice_module + + ENTRY %DynamicSlice.v5 (original_parameter: s32[2,2,258], start_index: s32[]) -> s32[2,2,258] { + %original_parameter = s32[2,2,258] parameter(0) + %constant = s32[] constant(0) + %start_index = s32[] parameter(1) + ROOT %dynamic-slice = s32[2,2,258] dynamic-slice(s32[2,2,258] %original_parameter, s32[] %constant, s32[] %constant, s32[] %start_index), dynamic_slice_sizes={2,2,258} + } + )"; + + HloModuleConfig config; + DebugOptions debug_options = config.debug_options(); + debug_options.set_xla_allow_scalar_index_dynamic_ops(true); + config.set_debug_options(debug_options); + + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseHloString(kScalarIndexDynamicSlice, config)); + auto status = verifier().Run(module.get()).status(); + ASSERT_TRUE(status.ok()); +} + +TEST_F(HloVerifierTest, ScalarIndexDynamicUpdateSlice) { + const char* const kScalarIndexDynamicSlice = R"( + HloModule DynamicUpdateSlice_module + + ENTRY %DynamicUpdateSlice.v4 (input: s32[1,1,25,1], update: s32[1,1,2,1], start_index.0: s32[], start_index.1: s32[], start_index.2: s32[], start_index.3: s32[]) -> s32[1,1,25,1] { + %input = s32[1,1,25,1]{3,2,1,0} parameter(0) + %update = s32[1,1,2,1]{3,2,1,0} parameter(1) + %start_index.0 = s32[] parameter(2) + %start_index.1 = s32[] parameter(3) + %start_index.2 = s32[] parameter(4) + %start_index.3 = s32[] parameter(5) + ROOT %dynamic-update-slice = s32[1,1,25,1]{3,2,1,0} dynamic-update-slice(s32[1,1,25,1]{3,2,1,0} %input, s32[1,1,2,1]{3,2,1,0} %update, s32[] %start_index.0, s32[] %start_index.1, s32[] %start_index.2, s32[] %start_index.3) + } + )"; + + HloModuleConfig config; + DebugOptions debug_options = config.debug_options(); + debug_options.set_xla_allow_scalar_index_dynamic_ops(true); + config.set_debug_options(debug_options); + + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseHloString(kScalarIndexDynamicSlice, config)); + auto status = verifier().Run(module.get()).status(); + ASSERT_TRUE(status.ok()); +} + TEST_F(HloVerifierTestLayoutSensitive, AddWithLayoutChangeNotAllowed) { TF_ASSERT_OK_AND_ASSIGN(auto module, ParseHloString(kAddWithLayoutChangeHlo)); auto status = verifier().Run(module.get()).status(); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index b0e241d216..4a2dd09742 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -2087,35 +2087,81 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, } /* static */ StatusOr ShapeInference::InferDynamicSliceShape( - const Shape& operand_shape, const Shape& start_indices_shape, - absl::Span slice_sizes) { + const Shape& operand_shape, absl::Span start_index_shapes, + absl::Span slice_sizes, bool allow_scalar_indices) { TF_RETURN_IF_ERROR(ExpectArray(operand_shape, "operand of dynamic slice")); - TF_RETURN_IF_ERROR( - ExpectArray(start_indices_shape, "start indices of dynamic slice")); + auto number_of_indices = start_index_shapes.size(); + // TODO(b/118437727): Remove this path. + if (!allow_scalar_indices || + (number_of_indices >= 1 && start_index_shapes[0].rank() == 1)) { + if (number_of_indices != 1) { + return InvalidArgument( + "Dynamic slice should have exactly 1 index operand, has %d.", + number_of_indices); + } - VLOG(2) << StrFormat( - "slicing shape %s at dynamic start_indices %s with slice_sizes={%s}", - ShapeUtil::HumanString(operand_shape), - ShapeUtil::HumanString(start_indices_shape), StrJoin(slice_sizes, ", ")); + const Shape& start_indices_shape = start_index_shapes[0]; + TF_RETURN_IF_ERROR( + ExpectArray(start_indices_shape, "start indices of dynamic slice")); - if (start_indices_shape.rank() != 1) { - return InvalidArgument( - "Dynamic slice start indices of rank %d must be rank1.", - start_indices_shape.rank()); - } + VLOG(2) << StrFormat( + "slicing shape %s at dynamic start_indices %s with slice_sizes={%s}", + ShapeUtil::HumanString(operand_shape), + ShapeUtil::HumanString(start_indices_shape), + StrJoin(slice_sizes, ", ")); - if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { - return InvalidArgument( - "Dynamic slice start indices must be of integral type."); - } + if (start_indices_shape.rank() != 1) { + return InvalidArgument( + "Dynamic slice start indices of rank %d must be rank1.", + start_indices_shape.rank()); + } - const int64 start_num_dims = start_indices_shape.dimensions(0); - if (operand_shape.rank() != start_num_dims) { - return InvalidArgument( - "Dynamic slice start number of dimensions %d (%s) must match rank " - "%d of slice input (%s).", - start_num_dims, ShapeUtil::HumanString(start_indices_shape), - operand_shape.rank(), ShapeUtil::HumanString(operand_shape)); + if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { + return InvalidArgument( + "Dynamic slice start indices must be of integral type."); + } + + const int64 start_num_dims = start_indices_shape.dimensions(0); + if (operand_shape.rank() != start_num_dims) { + return InvalidArgument( + "Dynamic slice start number of dimensions %d (%s) must match rank " + "%d of slice input (%s).", + start_num_dims, ShapeUtil::HumanString(start_indices_shape), + operand_shape.rank(), ShapeUtil::HumanString(operand_shape)); + } + } else { + VLOG(2) << StrFormat("slicing shape %s a with slice_sizes={%s}", + ShapeUtil::HumanString(operand_shape), + StrJoin(slice_sizes, ", ")); + + if (operand_shape.rank() != number_of_indices) { + return InvalidArgument( + "Dynamic slice start number of dimensions %d must match rank " + "%d of slice input (%s).", + number_of_indices, operand_shape.rank(), + ShapeUtil::HumanString(operand_shape)); + } + + if (number_of_indices > 0) { + const Shape& first_index_shape = start_index_shapes[0]; + if (!ShapeUtil::IsScalar(first_index_shape)) { + return InvalidArgument("Dynamic slice indices must be scalar, not %s.", + ShapeUtil::HumanString(first_index_shape)); + } + if (!ShapeUtil::ElementIsIntegral(first_index_shape)) { + return InvalidArgument( + "Dynamic slice start indices must be of integral type."); + } + for (const Shape& index_shape : start_index_shapes) { + if (!ShapeUtil::Equal(first_index_shape, index_shape)) { + return InvalidArgument( + "Dynamic slice start indices must all have the same shape, got " + "mismatching indices with shapes %s and %s.", + ShapeUtil::HumanString(first_index_shape), + ShapeUtil::HumanString(index_shape)); + } + } + } } if (slice_sizes.size() != operand_shape.rank()) { @@ -2144,39 +2190,85 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, /* static */ StatusOr ShapeInference::InferDynamicUpdateSliceShape( const Shape& operand_shape, const Shape& update_shape, - const Shape& start_indices_shape) { + absl::Span start_index_shapes, bool allow_scalar_indices) { TF_RETURN_IF_ERROR( ExpectArray(operand_shape, "operand of dynamic update slice")); TF_RETURN_IF_ERROR( ExpectArray(update_shape, "update of dynamic update slice")); - TF_RETURN_IF_ERROR(ExpectArray(start_indices_shape, - "start indices of dynamic update slice")); - VLOG(2) << StrFormat( - "updating slice of shape %s at dynamic start_indices %s with update " - "shape %s", - ShapeUtil::HumanString(operand_shape), - ShapeUtil::HumanString(start_indices_shape), - ShapeUtil::HumanString(update_shape)); + auto number_of_indices = start_index_shapes.size(); + // TODO(b/118437727): Remove this path. + if (!allow_scalar_indices || + (number_of_indices >= 1 && start_index_shapes[0].rank() == 1)) { + if (number_of_indices != 1) { + return InvalidArgument( + "Dynamic update slice should have exactly 1 index operand, has %d.", + number_of_indices); + } + const Shape& start_indices_shape = start_index_shapes[0]; + TF_RETURN_IF_ERROR(ExpectArray(start_indices_shape, + "start indices of dynamic update slice")); - if (start_indices_shape.rank() != 1) { - return InvalidArgument( - "Dynamic update slice start indices of rank %d must be rank1.", - start_indices_shape.rank()); - } + VLOG(2) << StrFormat( + "updating slice of shape %s at dynamic start_indices %s with update " + "shape %s", + ShapeUtil::HumanString(operand_shape), + ShapeUtil::HumanString(start_indices_shape), + ShapeUtil::HumanString(update_shape)); - if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { - return InvalidArgument( - "Dynamic update slice start indices must be of integral type."); - } + if (start_indices_shape.rank() != 1) { + return InvalidArgument( + "Dynamic update slice start indices of rank %d must be rank1.", + start_indices_shape.rank()); + } - const int64 start_num_dims = start_indices_shape.dimensions(0); - if (operand_shape.rank() != start_num_dims) { - return InvalidArgument( - "Dynamic update slice start number of dimensions %d (%s) must match " - "rank %d of slice input (%s).", - start_num_dims, ShapeUtil::HumanString(start_indices_shape), - operand_shape.rank(), ShapeUtil::HumanString(operand_shape)); + if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { + return InvalidArgument( + "Dynamic update slice start indices must be of integral type."); + } + + const int64 start_num_dims = start_indices_shape.dimensions(0); + if (operand_shape.rank() != start_num_dims) { + return InvalidArgument( + "Dynamic update slice start number of dimensions %d (%s) must match " + "rank %d of slice input (%s).", + start_num_dims, ShapeUtil::HumanString(start_indices_shape), + operand_shape.rank(), ShapeUtil::HumanString(operand_shape)); + } + } else { + VLOG(2) << StrFormat("updating slice of shape %s with update shape %s", + ShapeUtil::HumanString(operand_shape), + ShapeUtil::HumanString(update_shape)); + + if (operand_shape.rank() != number_of_indices) { + return InvalidArgument( + "Dynamic update slice start number of dimensions %d must match rank " + "%d of slice input (%s).", + number_of_indices, operand_shape.rank(), + ShapeUtil::HumanString(operand_shape)); + } + + if (number_of_indices > 0) { + const Shape& first_index_shape = start_index_shapes[0]; + if (!ShapeUtil::IsScalar(first_index_shape)) { + return InvalidArgument( + "Dynamic update slice indices must be scalar, not %s.", + ShapeUtil::HumanString(first_index_shape)); + } + if (!ShapeUtil::ElementIsIntegral(first_index_shape)) { + return InvalidArgument( + "Dynamic update slice start indices must be of integral type."); + } + for (const Shape& index_shape : start_index_shapes) { + if (!ShapeUtil::Equal(first_index_shape, index_shape)) { + return InvalidArgument( + "Dynamic update slice start indices must all have the same " + "shape, got mismatching indices with shapes %s and %s.", + ShapeUtil::HumanString(first_index_shape), + ShapeUtil::HumanString(index_shape)); + } + } + } } if (update_shape.rank() != operand_shape.rank()) { diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h index 1b8fd10d69..e440e364c8 100644 --- a/tensorflow/compiler/xla/service/shape_inference.h +++ b/tensorflow/compiler/xla/service/shape_inference.h @@ -176,14 +176,15 @@ class ShapeInference { // Infers the shape produced by a dynamic slice operation of size specified // in 'slice_sizes', with dynamic start indices shape 'start_indices_shape'. static StatusOr InferDynamicSliceShape( - const Shape& operand_shape, const Shape& start_indices_shape, - absl::Span slice_sizes); + const Shape& operand_shape, absl::Span start_index_shapes, + absl::Span slice_sizes, bool allow_scalar_indices = false); // Infers the shape produced by a dynamic update slice operation based // on the shape of operand and update. static StatusOr InferDynamicUpdateSliceShape( const Shape& operand_shape, const Shape& update_shape, - const Shape& start_indices_shape); + absl::Span start_index_shapes, + bool allow_scalar_indices = false); // Infers the shape produced by doing a compile-time-constant indexing into // the given input shape. This is essential for operations on tuples, because diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index 0e8fa73f81..e2d7b6ef46 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -230,7 +230,11 @@ message DebugOptions { // Enable fast math with eigen in the HLO evaluator. bool xla_hlo_evaluator_use_fast_path = 106; - // Next id: 107 + // Temporary option to allow support for both the R1 and the scalar index + // versions of DynamicSlice and DynamicUpdateSlice. Only used for testing. + bool xla_allow_scalar_index_dynamic_ops = 107; + + // Next id: 108 // Extra options to pass to the compilation backend (e.g. LLVM); specific // interpretation of these values is left to the backend. -- GitLab From c2c069b9f42da12a825c940a1d19fba713577127 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Fri, 28 Dec 2018 13:40:47 -0800 Subject: [PATCH 0066/1095] [XLA] Implement scalar index form of DS/DUS on Evaluator. This form is not yet documented, unsupposed by HLO passes, and only passes verification under a flag. PiperOrigin-RevId: 227165947 --- .../xla/service/hlo_evaluator_typed_visitor.h | 127 +++++++++++++----- .../compiler/xla/service/hlo_instructions.h | 4 +- 2 files changed, 95 insertions(+), 36 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index 70ddedf62b..6f3208be13 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -1406,12 +1406,22 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { auto operand = dynamic_slice->operand(0); auto start_indices = dynamic_slice->operand(1); auto result_shape = dynamic_slice->shape(); + // TODO(b/118437727): Remove all of this nonsense. + // We may get an instruction without a parent module. In this case, assume + // scalar indices are not allowed. + bool allow_scalar_index = false; + if (dynamic_slice->GetModule() != nullptr) { + allow_scalar_index = dynamic_slice->GetModule() + ->config() + .debug_options() + .xla_allow_scalar_index_dynamic_ops(); + } TF_ASSIGN_OR_RETURN( auto inferred_return_shape, ShapeInference::InferDynamicSliceShape( operand->shape(), Cast(dynamic_slice)->index_shapes(), - dynamic_slice->dynamic_slice_sizes())); + dynamic_slice->dynamic_slice_sizes(), allow_scalar_index)); TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) << "return shape is set to: " << ShapeUtil::HumanString(result_shape) << " but is inferred to be: " @@ -1420,33 +1430,39 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { primitive_util::IsIntegralType(start_indices->shape().element_type())); const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - const Literal& start_indices_literal = - parent_->GetEvaluatedLiteralFor(start_indices); switch (start_indices->shape().element_type()) { case S32: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); + DynamicSlice( + operand_literal, + absl::MakeConstSpan(dynamic_slice->operands()).subspan(1), + result_shape)); } break; case S64: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); + DynamicSlice( + operand_literal, + absl::MakeConstSpan(dynamic_slice->operands()).subspan(1), + result_shape)); } break; case U32: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); + DynamicSlice( + operand_literal, + absl::MakeConstSpan(dynamic_slice->operands()).subspan(1), + result_shape)); } break; case U64: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_slice], - DynamicSlice(operand_literal, start_indices_literal, - result_shape)); + DynamicSlice( + operand_literal, + absl::MakeConstSpan(dynamic_slice->operands()).subspan(1), + result_shape)); } break; default: LOG(FATAL) << "HandleDynamicSlice: unhandled primitive type for " @@ -1463,12 +1479,20 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { auto update = dynamic_update_slice->operand(1); auto start_indices = dynamic_update_slice->operand(2); auto result_shape = dynamic_update_slice->shape(); + bool allow_scalar_index = false; + if (dynamic_update_slice->GetModule() != nullptr) { + allow_scalar_index = dynamic_update_slice->GetModule() + ->config() + .debug_options() + .xla_allow_scalar_index_dynamic_ops(); + } TF_ASSIGN_OR_RETURN( auto inferred_return_shape, ShapeInference::InferDynamicUpdateSliceShape( operand->shape(), update->shape(), Cast(dynamic_update_slice) - ->index_shapes())); + ->index_shapes(), + allow_scalar_index)); TF_RET_CHECK(ShapeUtil::Compatible(result_shape, inferred_return_shape)) << "return shape is set to: " << ShapeUtil::HumanString(result_shape) << " but is inferred to be: " @@ -1479,33 +1503,39 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); const Literal& update_literal = parent_->GetEvaluatedLiteralFor(update); - const Literal& start_indices_literal = - parent_->GetEvaluatedLiteralFor(start_indices); switch (start_indices->shape().element_type()) { case S32: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); + DynamicUpdateSlice( + operand_literal, update_literal, + absl::MakeConstSpan(dynamic_update_slice->operands()) + .subspan(2))); } break; case S64: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); + DynamicUpdateSlice( + operand_literal, update_literal, + absl::MakeConstSpan(dynamic_update_slice->operands()) + .subspan(2))); } break; case U32: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); + DynamicUpdateSlice( + operand_literal, update_literal, + absl::MakeConstSpan(dynamic_update_slice->operands()) + .subspan(2))); } break; case U64: { TF_ASSIGN_OR_RETURN( parent_->evaluated_[dynamic_update_slice], - DynamicUpdateSlice(operand_literal, update_literal, - start_indices_literal)); + DynamicUpdateSlice( + operand_literal, update_literal, + absl::MakeConstSpan(dynamic_update_slice->operands()) + .subspan(2))); } break; default: LOG(FATAL) << "HandleDynamicUpdateSlice: unhandled primitive type for " @@ -2742,12 +2772,27 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { } template - StatusOr DynamicSlice(const Literal& operand_literal, - const Literal& start_indices_literal, - const Shape& result_shape) { - auto start_indices_typed = start_indices_literal.data(); - std::vector start(start_indices_typed.begin(), - start_indices_typed.end()); + StatusOr DynamicSlice( + const Literal& operand_literal, + absl::Span start_indices, + const Shape& result_shape) { + std::vector start; + // TODO(b/118437727): Remove the R1 code-path. Note that to distinguish + // between the cases, this currently assumes there is at least 1 index. That + // is wrong in the general case, because for scalar indices, if the operand + // is scalar, then there are no indices. This problem with resolve itself. + const HloInstruction* first_index = start_indices[0]; + if (first_index->shape().rank() == 1) { + auto start_indices_typed = + parent_->GetEvaluatedLiteralFor(first_index).data(); + start = std::vector(start_indices_typed.begin(), + start_indices_typed.end()); + } else { + for (HloInstruction* index : start_indices) { + start.push_back( + parent_->GetEvaluatedLiteralFor(index).GetFirstElement()); + } + } // Clamp the start indices so the slice is in-bounds w.r.t the operand. for (int64 i = 0; i < start.size(); ++i) { @@ -2773,14 +2818,28 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { } template - StatusOr DynamicUpdateSlice(const Literal& operand_literal, - const Literal& update_literal, - const Literal& start_indices_literal) { + StatusOr DynamicUpdateSlice( + const Literal& operand_literal, const Literal& update_literal, + absl::Span start_indices) { auto result = operand_literal.Clone(); - auto start_indices_typed = start_indices_literal.data(); const auto rank = result.shape().rank(); - std::vector start(start_indices_typed.begin(), - start_indices_typed.end()); + std::vector start; + // TODO(b/118437727): Remove the R1 code-path. Note that to distinguish + // between the cases, this currently assumes there is at least 1 index. That + // is wrong in the general case, because for scalar indices, if the operand + // is scalar, then there are no indices. This problem with resolve itself. + const HloInstruction* first_index = start_indices[0]; + if (first_index->shape().rank() == 1) { + auto start_indices_typed = + parent_->GetEvaluatedLiteralFor(first_index).data(); + start = std::vector(start_indices_typed.begin(), + start_indices_typed.end()); + } else { + for (HloInstruction* index : start_indices) { + start.push_back( + parent_->GetEvaluatedLiteralFor(index).GetFirstElement()); + } + } // Clamp the update start indices so the slice is in-bounds w.r.t the // operand. for (int64 i = 0; i < rank; ++i) { diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 83b6760683..e6111cfb57 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -1186,12 +1186,12 @@ class HloDynamicIndexInstruction : public HloInstruction { virtual int64 first_index_operand_number() const = 0; // Returns a subspan of operands which represent the start indices. - absl::Span index_operands() { + absl::Span index_operands() const { return absl::MakeSpan(operands()).subspan(first_index_operand_number()); } // Returns the shapes of the index operands. - std::vector index_shapes() { + std::vector index_shapes() const { std::vector shapes; auto indices = index_operands(); for (const HloInstruction* index : indices) { -- GitLab From d2ef9baf6e3551628b597ab626e84035af4dd3bb Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Fri, 28 Dec 2018 14:18:53 -0800 Subject: [PATCH 0067/1095] changed ExpectTensorNear to ExpectClose --- tensorflow/core/kernels/conv_ops_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index d144576249..09e300dd1c 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -775,7 +775,7 @@ class FusedConv2DOpTest : public OpsTestBase { // a full sum reduction, which causes larger numerical error // than usual cases. if (image_width == filter_size && image_height == filter_size) { - test::ExpectTensorNear(conv_2d, fused_conv_2d, 1e-4); + test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4); } else { test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); } @@ -825,7 +825,7 @@ class FusedConv2DOpTest : public OpsTestBase { // a full sum reduction, which causes larger numerical error // than usual cases. if (image_width == filter_size && image_height == filter_size) { - test::ExpectTensorNear(conv_2d, fused_conv_2d, 1e-4); + test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4); } else { test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6); } -- GitLab From 24ac448941c15acc4e035bf16427258f61463b09 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Fri, 28 Dec 2018 14:19:50 -0800 Subject: [PATCH 0068/1095] Enabled batch norm in correctness test. PiperOrigin-RevId: 227169465 --- .../python/keras_correctness_test.py | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/distribute/python/keras_correctness_test.py b/tensorflow/contrib/distribute/python/keras_correctness_test.py index 6dcc9a1f8d..3abdee2c0e 100644 --- a/tensorflow/contrib/distribute/python/keras_correctness_test.py +++ b/tensorflow/contrib/distribute/python/keras_correctness_test.py @@ -63,6 +63,9 @@ def all_strategy_combinations_with_graph_mode(): def strategy_and_input_combinations(): + def cnn_model_with_batch_norm(**kwargs): + return _create_cnn_model(with_batch_norm=True, **kwargs) + return ( combinations.times( combinations.combine(distribution=all_strategies), @@ -72,6 +75,10 @@ def strategy_and_input_combinations(): combinations.combine(model_with_data=[ ModelWithData('dnn', _create_dnn_model, _dnn_training_data), ModelWithData('cnn', _create_cnn_model, _cnn_training_data), + ModelWithData('cnn_batch_norm', + cnn_model_with_batch_norm, + _cnn_training_data, + with_batch_norm=True), ]))) @@ -99,10 +106,11 @@ class ModelWithData(object): The model_fn must take two arguments: initial_weights and distribution. """ - def __init__(self, name, model_fn, data_fn): + def __init__(self, name, model_fn, data_fn, with_batch_norm=False): self.name = name self.model_fn = model_fn self.data_fn = data_fn + self.with_batch_norm = with_batch_norm def __repr__(self): return self.name @@ -158,16 +166,15 @@ def _cnn_training_data(count=_GLOBAL_BATCH_SIZE * _EVAL_STEPS, return x_train, y_train, x_predict -def _create_cnn_model(initial_weights=None, distribution=None): +def _create_cnn_model(initial_weights=None, distribution=None, + with_batch_norm=False): with MaybeDistributionScope(distribution): image = keras.layers.Input(shape=(28, 28, 3), name='image') c1 = keras.layers.Conv2D( name='conv1', filters=16, kernel_size=(3, 3), strides=(4, 4))( image) - # TODO(xiejw): Consider to enable the batch norm layer even it is not easy - # to test with TPU. - # - # c1 = keras.layers.BatchNormalization(name='bn1')(image) + if with_batch_norm: + c1 = keras.layers.BatchNormalization(name='bn1')(c1) c1 = keras.layers.MaxPooling2D(pool_size=(2, 2))(c1) logits = keras.layers.Dense( 10, activation='softmax', name='pred')( @@ -470,8 +477,17 @@ class TestDistributionStrategyCorrectness(test.TestCase, results_without_ds = fit_eval_and_predict( initial_weights, input_fn=input_fn, model_fn=model_fn, distribution=None) - compare_results(results_with_ds, results_without_ds, distribution, - testcase=self) + + # First, special case, for multi-replica distributed training, batch norm + # is not aggregated globally. So it is expected to have different weights. + if (model_with_data.with_batch_norm and + distribution.num_replicas_in_sync > 1): + with self.assertRaises(AssertionError): + compare_results(results_with_ds, results_without_ds, distribution, + testcase=self) + else: + compare_results(results_with_ds, results_without_ds, distribution, + testcase=self) @combinations.generate(all_strategy_combinations_with_graph_mode()) def test_dynamic_lr(self, distribution): -- GitLab From 35f2d889bc136094c6f6b200f3b2913950ed9bbe Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 28 Dec 2018 14:57:13 -0800 Subject: [PATCH 0069/1095] Tidy up work for dynamic kernels: - rename all_kernels_statically_linked to all_kernels_impl - For now, exclude dataset ops kernels and list_kernels from dynamic loading kernels. PiperOrigin-RevId: 227172592 --- tensorflow/core/BUILD | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 640f881e5f..326f9587e8 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1371,7 +1371,7 @@ cc_library( # This includes implementations of all kernels built into TensorFlow. cc_library( - name = "all_kernels_statically_linked", + name = "all_kernels_impl", visibility = ["//visibility:private"], deps = [ "//tensorflow/core/kernels:array", @@ -1387,7 +1387,6 @@ cc_library( "//tensorflow/core/kernels:ctc_ops", "//tensorflow/core/kernels:cudnn_rnn_kernels", "//tensorflow/core/kernels:data_flow", - "//tensorflow/core/kernels:dataset_ops", "//tensorflow/core/kernels:decode_proto_op", "//tensorflow/core/kernels:encode_proto_op", "//tensorflow/core/kernels:fake_quant_ops", @@ -1398,7 +1397,6 @@ cc_library( "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:io", "//tensorflow/core/kernels:linalg", - "//tensorflow/core/kernels:list_kernels", "//tensorflow/core/kernels:lookup", "//tensorflow/core/kernels:logging", "//tensorflow/core/kernels:manip", @@ -1461,8 +1459,13 @@ cc_library( visibility = ["//visibility:public"], deps = if_dynamic_kernels( [], - otherwise = [":all_kernels_statically_linked"], - ), + otherwise = [":all_kernels_impl"], + ) + [ + # TODO(gunan): Work on the API between these and rest of TF and make + # these also dynamically loading. + "//tensorflow/core/kernels:dataset_ops", # Depends on grappler + "//tensorflow/core/kernels:list_kernels", # Depends on variant_op_registry.h + ], ) tf_cuda_library( -- GitLab From 0a9a87fa1a47efbd97a2c6741643edd9ca81bb5a Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 28 Dec 2018 15:31:24 -0800 Subject: [PATCH 0070/1095] Change GetExecutablePath to return correct directory for python. PiperOrigin-RevId: 227175352 --- tensorflow/core/BUILD | 23 +++++++ tensorflow/core/platform/env.cc | 29 ++++++++- .../core/platform/fake_python_env_test.cc | 65 +++++++++++++++++++ 3 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 tensorflow/core/platform/fake_python_env_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 326f9587e8..4862c90aca 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3496,6 +3496,29 @@ tf_cc_test( ], ) +tf_cc_test( + name = "platform_fake_python_env_test", + size = "small", + srcs = ["platform/fake_python_env_test.cc"], + args = [ + "/some/path/to/pythontest.runfiles/org_tensorflow/stuff/to/run.py", + ], + tags = [ + "local", + "no_windows", + "nogpu", + "nomac", + "notap", + ], + deps = [ + ":lib", + ":lib_internal", + ":lib_test_internal", + ":test", + ":test_main", + ], +) + tf_cc_test( name = "platform_abi_test", size = "small", diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc index afc4201e53..6339417445 100644 --- a/tensorflow/core/platform/env.cc +++ b/tensorflow/core/platform/env.cc @@ -29,6 +29,9 @@ limitations under the License. #include "tensorflow/core/platform/windows/wide_char.h" #define PATH_MAX MAX_PATH #else +#include +#include +#include #include #endif @@ -314,7 +317,31 @@ string Env::GetExecutablePath() { string file_path = WideCharToUtf8(wc_file_path); std::copy(file_path.begin(), file_path.end(), exe_path); #else - CHECK_NE(-1, readlink("/proc/self/exe", exe_path, sizeof(exe_path) - 1)); + char buf[PATH_MAX] = {0}; + int path_length = readlink("/proc/self/exe", buf, sizeof(buf) - 1); + CHECK_NE(-1, path_length); + + if (strstr(buf, "python") != nullptr) { + // Discard the path of the python binary, and any flags. + int fd = open("/proc/self/cmdline", O_RDONLY); + int cmd_length = read(fd, buf, PATH_MAX - 1); + CHECK_NE(-1, cmd_length); + int token_pos = 0; + for (bool token_is_first_or_flag = true; token_is_first_or_flag;) { + // Get token length, including null + int token_len = strlen(&buf[token_pos]) + 1; + token_is_first_or_flag = false; + // Check if we can skip without overshooting + if (token_pos + token_len < cmd_length) { + token_pos += token_len; + token_is_first_or_flag = (buf[token_pos] == '-'); // token is a flag + } + } + snprintf(exe_path, sizeof(exe_path), "%s", &buf[token_pos]); + } else { + snprintf(exe_path, sizeof(exe_path), "%s", buf); + } + #endif // Make sure it's null-terminated: exe_path[sizeof(exe_path) - 1] = 0; diff --git a/tensorflow/core/platform/fake_python_env_test.cc b/tensorflow/core/platform/fake_python_env_test.cc new file mode 100644 index 0000000000..b521db3c05 --- /dev/null +++ b/tensorflow/core/platform/fake_python_env_test.cc @@ -0,0 +1,65 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file has "python" in its name. Thus, it should trigger the python +// specific code paths. + +#include +#include +#include +#include + +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" + +int myargc; +char** myargv; + +char kMagicBazelDirSubstring[] = ".runfiles/org_tensorflow"; +char kPythonFile[] = + "/some/path/to/pythontest.runfiles/org_tensorflow/stuff/to/run.py"; + +namespace tensorflow { + +TEST(FakePythonEnvTest, GetExecutablePath) { + // See if argc is greater than 1 and first arg is kPythonFile + // If not, rerun the executable with proper args. + if (myargc <= 1 || strstr(myargv[1], kMagicBazelDirSubstring) == nullptr) { + const char* filename = myargv[0]; + char* new_argv[] = { + myargv[0], + kPythonFile, + nullptr, + }; + + execv(filename, new_argv); + } + + Env* env = Env::Default(); + // We depend on the file/executable name to include python and fool the + // library to think this is running under the python interpreter. + string path = env->GetExecutablePath(); + EXPECT_TRUE(strstr(path.c_str(), kMagicBazelDirSubstring) != nullptr); +} + +} // namespace tensorflow + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + myargc = argc; + myargv = argv; + return RUN_ALL_TESTS(); +} -- GitLab From a4781da6b85f9b1e327dcdf8cd35b6a6385c600f Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 28 Dec 2018 16:14:19 -0800 Subject: [PATCH 0071/1095] Fix a bug in GetRunFilesDir in posix systems. Correctly handle what is returned by GetExecutablePath if the path returned is already a path under the runfiles directory. PiperOrigin-RevId: 227178949 --- tensorflow/core/platform/posix/env.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/platform/posix/env.cc b/tensorflow/core/platform/posix/env.cc index 0a939aef25..d87e5dcfe7 100644 --- a/tensorflow/core/platform/posix/env.cc +++ b/tensorflow/core/platform/posix/env.cc @@ -121,13 +121,25 @@ class PosixEnv : public Env { string GetRunfilesDir() override { string bin_path = this->GetExecutablePath(); - string runfiles_path = bin_path + ".runfiles/org_tensorflow"; + string runfiles_suffix = ".runfiles/org_tensorflow"; + std::size_t pos = bin_path.find(runfiles_suffix); + + // Sometimes (when executing under python) bin_path returns the full path to + // the python scripts under runfiles. Get the substring. + if (pos != std::string::npos) { + return bin_path.substr(0, pos + runfiles_suffix.length()); + } + + // See if we have the executable path. if executable.runfiles exists, return + // that folder. + string runfiles_path = bin_path + runfiles_suffix; Status s = this->IsDirectory(runfiles_path); if (s.ok()) { return runfiles_path; - } else { - return bin_path.substr(0, bin_path.find_last_of("/\\")); } + + // If nothing can be found, return something close. + return bin_path.substr(0, bin_path.find_last_of("/\\")); } private: -- GitLab From 719309ad0d593f2977c422575248018314e3160d Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Fri, 28 Dec 2018 16:19:33 -0800 Subject: [PATCH 0072/1095] Make user buffer alias pass to check with the existing module alias configuration, when populating aliases. PiperOrigin-RevId: 227179340 --- .../xla/service/hlo_input_output_alias_config.cc | 9 +++++++++ .../compiler/xla/service/hlo_input_output_alias_config.h | 7 +++++++ 2 files changed, 16 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc index 6e1597fd03..70d4df5d1c 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -17,9 +17,17 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_module.h" namespace xla { + +bool HloInputOutputAliasConfig::OutputHasAlias( + const ShapeIndex& output_index) const { + return aliased_output_indices_.count(output_index) > 0; +} + Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, int64 param_number, const ShapeIndex& param_index) { + TF_RET_CHECK(!OutputHasAlias(output_index)) + << "Output index " << output_index << " already has an alias setup"; TF_RET_CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)) << absl::StrCat("Tring to set up alias at ", output_index.ToString(), " which is an invalid index for shape ", @@ -33,6 +41,7 @@ Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, alias_.element(output_index)->second.ToString()); (*alias_.mutable_element(output_index)) = std::make_pair(param_number, param_index); + aliased_output_indices_.insert(output_index); VLOG(4) << "Set up alias between output index " << output_index.ToString() << " and parameter " << param_index << " at index " << param_index.ToString(); diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h index 439676b154..3967743d53 100644 --- a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -18,6 +18,7 @@ limitations under the License. #include +#include "absl/container/flat_hash_set.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/shape_tree.h" @@ -47,6 +48,9 @@ class HloInputOutputAliasConfig { bool ParameterHasAlias(int64 param_number, const ShapeIndex& param_index) const; + // Checks whether the provided output index has already been aliased. + bool OutputHasAlias(const ShapeIndex& output_index) const; + // (De)Serializes an HloInputOutoutAliasConfig to/from an // HloInputOutoutAliasProto. HloInputOutputAliasProto ToProto() const; @@ -93,6 +97,9 @@ class HloInputOutputAliasConfig { // is a pair of parameter number and index into the buffer. If the value is // nullopt, it means there is no parameter aliasing for this output. ShapeTree>> alias_; + + // The indices of the output which have been aliased. + absl::flat_hash_set aliased_output_indices_; }; std::ostream& operator<<(std::ostream& out, -- GitLab From c59db932b0334c9c1e2e385e6435ee31ea3d7087 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 28 Dec 2018 16:32:01 -0800 Subject: [PATCH 0073/1095] Upgrade default TF cuda version to cuda 10. PiperOrigin-RevId: 227180147 --- configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.py b/configure.py index c588381d40..bb59063f79 100644 --- a/configure.py +++ b/configure.py @@ -33,7 +33,7 @@ except ImportError: from distutils.spawn import find_executable as which # pylint: enable=g-import-not-at-top -_DEFAULT_CUDA_VERSION = '9.0' +_DEFAULT_CUDA_VERSION = '10.0' _DEFAULT_CUDNN_VERSION = '7' _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,7.0' _DEFAULT_CUDA_PATH = '/usr/local/cuda' -- GitLab From 7966a07f756730650b29736958de53e99d284f68 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 28 Dec 2018 17:05:41 -0800 Subject: [PATCH 0074/1095] Remove dependency from cloud kernels to cloud op libraries. PiperOrigin-RevId: 227182645 --- tensorflow/contrib/cloud/kernels/BUILD | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/contrib/cloud/kernels/BUILD b/tensorflow/contrib/cloud/kernels/BUILD index 1311063ec0..20f8c2b245 100644 --- a/tensorflow/contrib/cloud/kernels/BUILD +++ b/tensorflow/contrib/cloud/kernels/BUILD @@ -27,7 +27,6 @@ tf_kernel_library( deps = [ ":bigquery_table_accessor", ":bigquery_table_partition_proto_cc", - "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:reader_base", @@ -79,7 +78,6 @@ tf_kernel_library( srcs = ["gcs_config_ops.cc"], visibility = ["//tensorflow:internal"], deps = [ - "//tensorflow/contrib/cloud:gcs_config_ops_op_lib", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core/platform/cloud:curl_http_request", -- GitLab From 447b5edf2acb49a0b4fbaa4d37b099f0304894a1 Mon Sep 17 00:00:00 2001 From: Anna R Date: Fri, 28 Dec 2018 17:11:07 -0800 Subject: [PATCH 0075/1095] Add windows_and_api_version_2 option to select statements in build_config.bzl to allow matching both on windows and api_version_2. PiperOrigin-RevId: 227183002 --- tensorflow/BUILD | 9 +++++++++ tensorflow/core/platform/default/build_config.bzl | 6 ++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index f07e7365d3..0b4498ef12 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -370,6 +370,15 @@ config_setting( define_values = {"tf_api_version": "2"}, ) +# This flag is defined for select statements that match both +# on 'windows' and 'api_version_2'. In this case, bazel requires +# having a flag which is a superset of these two. +config_setting( + name = "windows_and_api_version_2", + define_values = {"tf_api_version": "2"}, + values = {"cpu": "x64_windows"}, +) + package_group( name = "internal", packages = [ diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 58fd0e2712..e26828c75e 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -663,8 +663,9 @@ def tf_additional_cloud_op_deps(): "//tensorflow:ios": [], "//tensorflow:linux_s390x": [], "//tensorflow:windows": [], - "//tensorflow:no_gcp_support": [], "//tensorflow:api_version_2": [], + "//tensorflow:windows_and_api_version_2": [], + "//tensorflow:no_gcp_support": [], "//conditions:default": [ "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", "//tensorflow/contrib/cloud:gcs_config_ops_op_lib", @@ -678,8 +679,9 @@ def tf_additional_cloud_kernel_deps(): "//tensorflow:ios": [], "//tensorflow:linux_s390x": [], "//tensorflow:windows": [], - "//tensorflow:no_gcp_support": [], "//tensorflow:api_version_2": [], + "//tensorflow:windows_and_api_version_2": [], + "//tensorflow:no_gcp_support": [], "//conditions:default": [ "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops", "//tensorflow/contrib/cloud/kernels:gcs_config_ops", -- GitLab From 817504f1e487fe91b8f0442ad945b5397188f981 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 28 Dec 2018 18:05:12 -0800 Subject: [PATCH 0076/1095] Allow ReplaceOperandWith and ReplaceAllUsesWith to accept different shapes if specified. PiperOrigin-RevId: 227186196 --- .../compiler/xla/service/hlo_instruction.cc | 25 +++++++++++++++---- .../compiler/xla/service/hlo_instruction.h | 15 +++++++++-- .../compiler/xla/service/hlo_instructions.cc | 2 +- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 543fb66c01..66bc73740f 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1885,6 +1885,16 @@ Status HloInstruction::ReplaceUseWith(HloInstruction* user, Status HloInstruction::ReplaceOperandWith(int64 operand_num, HloInstruction* new_operand) { + auto old_operand = operand(operand_num); + TF_RET_CHECK(ShapeUtil::CompatibleIgnoringFpPrecision(old_operand->shape(), + new_operand->shape())) + << old_operand->shape() << " is not compatible with " + << new_operand->shape(); + return ReplaceOperandWithDifferentShape(operand_num, new_operand); +} + +Status HloInstruction::ReplaceOperandWithDifferentShape( + int64 operand_num, HloInstruction* new_operand) { TF_RET_CHECK(operand_num >= 0); TF_RET_CHECK(operand_num < operand_count()); HloInstruction* old_operand = mutable_operand(operand_num); @@ -1892,10 +1902,6 @@ Status HloInstruction::ReplaceOperandWith(int64 operand_num, return Status::OK(); } - TF_RET_CHECK(ShapeUtil::CompatibleIgnoringFpPrecision(old_operand->shape(), - new_operand->shape())) - << old_operand->shape() << " is not compatible with " - << new_operand->shape(); operands_[operand_num] = new_operand; VLOG(3) << "Replacing operand " << operand_num << " of " << name() << " with " @@ -1910,6 +1916,14 @@ Status HloInstruction::ReplaceOperandWith(int64 operand_num, } Status HloInstruction::ReplaceAllUsesWith(HloInstruction* new_producer) { + TF_RET_CHECK( + ShapeUtil::CompatibleIgnoringFpPrecision(shape(), new_producer->shape())) + << shape() << " is not compatible with " << new_producer->shape(); + return ReplaceAllUsesWithDifferentShape(new_producer); +} + +Status HloInstruction::ReplaceAllUsesWithDifferentShape( + HloInstruction* new_producer) { bool new_producer_is_user = false; for (HloInstruction* user : users()) { if (user == new_producer) { @@ -1934,7 +1948,8 @@ Status HloInstruction::ReplaceAllUsesWith(HloInstruction* new_producer) { AddUser(new_producer); } if (parent_ && parent_->root_instruction() == this) { - parent_->set_root_instruction(new_producer); + parent_->set_root_instruction(new_producer, + /*accept_different_shape=*/true); } return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 362d07e64f..2827eed0df 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -937,11 +937,16 @@ class HloInstruction { // operands of it which could be created due to this replacement. Status ReplaceUseWith(HloInstruction* user, HloInstruction* new_producer); - // Replaces the specified operand with new_operand. + // Replaces the specified operand with new_operand. The old and new operands + // must have compatible shapes ignoring floating-point precision. // // This function does NOT remove duplicated operands even if this instruction // is a fusion, so that the existing operand numbers do not change. - Status ReplaceOperandWith(int64 operand_no, HloInstruction* new_operand); + Status ReplaceOperandWith(int64 operand_num, HloInstruction* new_operand); + + // Same as ReplaceOperandWith(), but new_operand can have a different shape. + Status ReplaceOperandWithDifferentShape(int64 operand_num, + HloInstruction* new_operand); // Replaces all uses of this instruction with the new producer. If // new_producer is a user of this instruction then new_producer remains a use @@ -950,10 +955,16 @@ class HloInstruction { // If this instruction is the root of its computation, sets the computation's // root to new_producer. // + // The new producer must have a compatible shape ignoring floating-point + // precision. + // // If a user is a fusion instruction, this function will remove any duplicated // operands of it which could be created due to this replacement. Status ReplaceAllUsesWith(HloInstruction* new_producer); + // Same as ReplaceAllUsesWith, but new_producer can have a different shape. + Status ReplaceAllUsesWithDifferentShape(HloInstruction* new_producer); + // Performs a postorder DFS visit using this node as the root. If // call_finish_visit is true, then DfsHloVisitor::FinishVisit is called when // complete. If ignore_control_predecessors is true, instructions only diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 7a3fb6d9a2..7170dd7d81 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -1324,7 +1324,7 @@ HloInstruction* HloFusionInstruction::CloneAndFuseInternal( if (newly_created_tuple_instr) { HloInstruction* new_instr = parent()->AddInstruction( HloInstruction::CreateGetTupleElement(fused_root->shape(), this, 0)); - TF_CHECK_OK(ReplaceAllUsesWith(new_instr)); + TF_CHECK_OK(ReplaceAllUsesWithDifferentShape(new_instr)); } int64 index = tuple_elements.size(); if (instruction_to_fuse->opcode() == HloOpcode::kTuple) { -- GitLab From 3aaebcbcada0bb861ebfee0eea6068de0c7f37be Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 28 Dec 2018 18:31:41 -0800 Subject: [PATCH 0077/1095] Move default TF builds to cuda 10. PiperOrigin-RevId: 227187422 --- third_party/toolchains/gpus/cuda/BUILD | 42 +++++++++---------- .../toolchains/gpus/cuda/cuda/cuda_config.h | 4 +- .../bin/crosstool_wrapper_driver_is_not_gcc | 4 +- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/third_party/toolchains/gpus/cuda/BUILD b/third_party/toolchains/gpus/cuda/BUILD index f63a0ea819..8bb22c0269 100644 --- a/third_party/toolchains/gpus/cuda/BUILD +++ b/third_party/toolchains/gpus/cuda/BUILD @@ -85,8 +85,8 @@ cc_library( cc_library( name = "cudart", - srcs = ["cuda/lib/libcudart.so.9.0"], - data = ["cuda/lib/libcudart.so.9.0"], + srcs = ["cuda/lib/libcudart.so.10.0"], + data = ["cuda/lib/libcudart.so.10.0"], includes = [ ".", "cuda/include", @@ -97,8 +97,8 @@ cc_library( cc_library( name = "cublas", - srcs = ["cuda/lib/libcublas.so.9.0"], - data = ["cuda/lib/libcublas.so.9.0"], + srcs = ["cuda/lib/libcublas.so.10.0"], + data = ["cuda/lib/libcublas.so.10.0"], includes = [ ".", "cuda/include", @@ -109,8 +109,8 @@ cc_library( cc_library( name = "cusolver", - srcs = ["cuda/lib/libcusolver.so.9.0"], - data = ["cuda/lib/libcusolver.so.9.0"], + srcs = ["cuda/lib/libcusolver.so.10.0"], + data = ["cuda/lib/libcusolver.so.10.0"], includes = [ ".", "cuda/include", @@ -143,8 +143,8 @@ cc_library( cc_library( name = "cufft", - srcs = ["cuda/lib/libcufft.so.9.0"], - data = ["cuda/lib/libcufft.so.9.0"], + srcs = ["cuda/lib/libcufft.so.10.0"], + data = ["cuda/lib/libcufft.so.10.0"], includes = [ ".", "cuda/include", @@ -155,8 +155,8 @@ cc_library( cc_library( name = "curand", - srcs = ["cuda/lib/libcurand.so.9.0"], - data = ["cuda/lib/libcurand.so.9.0"], + srcs = ["cuda/lib/libcurand.so.10.0"], + data = ["cuda/lib/libcurand.so.10.0"], includes = [ ".", "cuda/include", @@ -193,7 +193,7 @@ cc_library( cc_library( name = "cupti_dsos", - data = ["cuda/lib/libcupti.so.9.0"], + data = ["cuda/lib/libcupti.so.10.0"], includes = [ ".", "cuda/include", @@ -1193,7 +1193,7 @@ genrule( "cuda/include/vector_types.h", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/include/CL/cl.h" "$(@D)/cuda/include/CL/cl.h" && cp "/usr/local/cuda-9.0/include/CL/cl.hpp" "$(@D)/cuda/include/CL/cl.hpp" && cp "/usr/local/cuda-9.0/include/CL/cl_egl.h" "$(@D)/cuda/include/CL/cl_egl.h" && cp "/usr/local/cuda-9.0/include/CL/cl_ext.h" "$(@D)/cuda/include/CL/cl_ext.h" && cp "/usr/local/cuda-9.0/include/CL/cl_gl.h" "$(@D)/cuda/include/CL/cl_gl.h" && cp "/usr/local/cuda-9.0/include/CL/cl_gl_ext.h" "$(@D)/cuda/include/CL/cl_gl_ext.h" && cp "/usr/local/cuda-9.0/include/CL/cl_platform.h" "$(@D)/cuda/include/CL/cl_platform.h" && cp "/usr/local/cuda-9.0/include/CL/opencl.h" "$(@D)/cuda/include/CL/opencl.h" && cp "/usr/local/cuda-9.0/include/builtin_types.h" "$(@D)/cuda/include/builtin_types.h" && cp "/usr/local/cuda-9.0/include/channel_descriptor.h" "$(@D)/cuda/include/channel_descriptor.h" && cp "/usr/local/cuda-9.0/include/common_functions.h" "$(@D)/cuda/include/common_functions.h" && cp "/usr/local/cuda-9.0/include/cooperative_groups.h" "$(@D)/cuda/include/cooperative_groups.h" && cp "/usr/local/cuda-9.0/include/cooperative_groups_helpers.h" "$(@D)/cuda/include/cooperative_groups_helpers.h" && cp "/usr/local/cuda-9.0/include/crt/common_functions.h" "$(@D)/cuda/include/crt/common_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_double_functions.h" "$(@D)/cuda/include/crt/device_double_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_double_functions.hpp" "$(@D)/cuda/include/crt/device_double_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/device_functions.h" "$(@D)/cuda/include/crt/device_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_functions.hpp" "$(@D)/cuda/include/crt/device_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/func_macro.h" "$(@D)/cuda/include/crt/func_macro.h" && cp "/usr/local/cuda-9.0/include/crt/host_config.h" "$(@D)/cuda/include/crt/host_config.h" && cp "/usr/local/cuda-9.0/include/crt/host_defines.h" "$(@D)/cuda/include/crt/host_defines.h" && cp "/usr/local/cuda-9.0/include/crt/host_runtime.h" "$(@D)/cuda/include/crt/host_runtime.h" && cp "/usr/local/cuda-9.0/include/crt/math_functions.h" "$(@D)/cuda/include/crt/math_functions.h" && cp "/usr/local/cuda-9.0/include/crt/math_functions.hpp" "$(@D)/cuda/include/crt/math_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/mma.h" "$(@D)/cuda/include/crt/mma.h" && cp "/usr/local/cuda-9.0/include/crt/mma.hpp" "$(@D)/cuda/include/crt/mma.hpp" && cp "/usr/local/cuda-9.0/include/crt/nvfunctional" "$(@D)/cuda/include/crt/nvfunctional" && cp "/usr/local/cuda-9.0/include/crt/sm_70_rt.h" "$(@D)/cuda/include/crt/sm_70_rt.h" && cp "/usr/local/cuda-9.0/include/crt/sm_70_rt.hpp" "$(@D)/cuda/include/crt/sm_70_rt.hpp" && cp "/usr/local/cuda-9.0/include/crt/storage_class.h" "$(@D)/cuda/include/crt/storage_class.h" && cp "/usr/local/cuda-9.0/include/cuComplex.h" "$(@D)/cuda/include/cuComplex.h" && cp "/usr/local/cuda-9.0/include/cublas.h" "$(@D)/cuda/include/cublas.h" && cp "/usr/local/cuda-9.0/include/cublasXt.h" "$(@D)/cuda/include/cublasXt.h" && cp "/usr/local/cuda-9.0/include/cublas_api.h" "$(@D)/cuda/include/cublas_api.h" && cp "/usr/local/cuda-9.0/include/cublas_v2.h" "$(@D)/cuda/include/cublas_v2.h" && cp "/usr/local/cuda-9.0/include/cuda.h" "$(@D)/cuda/include/cuda.h" && cp "/usr/local/cuda-9.0/include/cudaEGL.h" "$(@D)/cuda/include/cudaEGL.h" && cp "/usr/local/cuda-9.0/include/cudaGL.h" "$(@D)/cuda/include/cudaGL.h" && cp "/usr/local/cuda-9.0/include/cudaProfiler.h" "$(@D)/cuda/include/cudaProfiler.h" && cp "/usr/local/cuda-9.0/include/cudaVDPAU.h" "$(@D)/cuda/include/cudaVDPAU.h" && cp "/usr/local/cuda-9.0/include/cuda_device_runtime_api.h" "$(@D)/cuda/include/cuda_device_runtime_api.h" && cp "/usr/local/cuda-9.0/include/cuda_fp16.h" "$(@D)/cuda/include/cuda_fp16.h" && cp "/usr/local/cuda-9.0/include/cuda_fp16.hpp" "$(@D)/cuda/include/cuda_fp16.hpp" && cp "/usr/local/cuda-9.0/include/cuda_gl_interop.h" "$(@D)/cuda/include/cuda_gl_interop.h" && cp "/usr/local/cuda-9.0/include/cuda_occupancy.h" "$(@D)/cuda/include/cuda_occupancy.h" && cp "/usr/local/cuda-9.0/include/cuda_profiler_api.h" "$(@D)/cuda/include/cuda_profiler_api.h" && cp "/usr/local/cuda-9.0/include/cuda_runtime.h" "$(@D)/cuda/include/cuda_runtime.h" && cp "/usr/local/cuda-9.0/include/cuda_runtime_api.h" "$(@D)/cuda/include/cuda_runtime_api.h" && cp "/usr/local/cuda-9.0/include/cuda_surface_types.h" "$(@D)/cuda/include/cuda_surface_types.h" && cp "/usr/local/cuda-9.0/include/cuda_texture_types.h" "$(@D)/cuda/include/cuda_texture_types.h" && cp "/usr/local/cuda-9.0/include/cuda_vdpau_interop.h" "$(@D)/cuda/include/cuda_vdpau_interop.h" && cp "/usr/local/cuda-9.0/include/cudalibxt.h" "$(@D)/cuda/include/cudalibxt.h" && cp "/usr/local/cuda-9.0/include/cudnn.h" "$(@D)/cuda/include/cudnn.h" && cp "/usr/local/cuda-9.0/include/cufft.h" "$(@D)/cuda/include/cufft.h" && cp "/usr/local/cuda-9.0/include/cufftXt.h" "$(@D)/cuda/include/cufftXt.h" && cp "/usr/local/cuda-9.0/include/cufftw.h" "$(@D)/cuda/include/cufftw.h" && cp "/usr/local/cuda-9.0/include/curand.h" "$(@D)/cuda/include/curand.h" && cp "/usr/local/cuda-9.0/include/curand_discrete.h" "$(@D)/cuda/include/curand_discrete.h" && cp "/usr/local/cuda-9.0/include/curand_discrete2.h" "$(@D)/cuda/include/curand_discrete2.h" && cp "/usr/local/cuda-9.0/include/curand_globals.h" "$(@D)/cuda/include/curand_globals.h" && cp "/usr/local/cuda-9.0/include/curand_kernel.h" "$(@D)/cuda/include/curand_kernel.h" && cp "/usr/local/cuda-9.0/include/curand_lognormal.h" "$(@D)/cuda/include/curand_lognormal.h" && cp "/usr/local/cuda-9.0/include/curand_mrg32k3a.h" "$(@D)/cuda/include/curand_mrg32k3a.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32.h" "$(@D)/cuda/include/curand_mtgp32.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32_host.h" "$(@D)/cuda/include/curand_mtgp32_host.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32_kernel.h" "$(@D)/cuda/include/curand_mtgp32_kernel.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32dc_p_11213.h" "$(@D)/cuda/include/curand_mtgp32dc_p_11213.h" && cp "/usr/local/cuda-9.0/include/curand_normal.h" "$(@D)/cuda/include/curand_normal.h" && cp "/usr/local/cuda-9.0/include/curand_normal_static.h" "$(@D)/cuda/include/curand_normal_static.h" && cp "/usr/local/cuda-9.0/include/curand_philox4x32_x.h" "$(@D)/cuda/include/curand_philox4x32_x.h" && cp "/usr/local/cuda-9.0/include/curand_poisson.h" "$(@D)/cuda/include/curand_poisson.h" && cp "/usr/local/cuda-9.0/include/curand_precalc.h" "$(@D)/cuda/include/curand_precalc.h" && cp "/usr/local/cuda-9.0/include/curand_uniform.h" "$(@D)/cuda/include/curand_uniform.h" && cp "/usr/local/cuda-9.0/include/cusolverDn.h" "$(@D)/cuda/include/cusolverDn.h" && cp "/usr/local/cuda-9.0/include/cusolverRf.h" "$(@D)/cuda/include/cusolverRf.h" && cp "/usr/local/cuda-9.0/include/cusolverSp.h" "$(@D)/cuda/include/cusolverSp.h" && cp "/usr/local/cuda-9.0/include/cusolverSp_LOWLEVEL_PREVIEW.h" "$(@D)/cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h" && cp "/usr/local/cuda-9.0/include/cusolver_common.h" "$(@D)/cuda/include/cusolver_common.h" && cp "/usr/local/cuda-9.0/include/cusparse.h" "$(@D)/cuda/include/cusparse.h" && cp "/usr/local/cuda-9.0/include/cusparse_v2.h" "$(@D)/cuda/include/cusparse_v2.h" && cp "/usr/local/cuda-9.0/include/device_atomic_functions.h" "$(@D)/cuda/include/device_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/device_atomic_functions.hpp" "$(@D)/cuda/include/device_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_double_functions.h" "$(@D)/cuda/include/device_double_functions.h" && cp "/usr/local/cuda-9.0/include/device_double_functions.hpp" "$(@D)/cuda/include/device_double_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_functions.h" "$(@D)/cuda/include/device_functions.h" && cp "/usr/local/cuda-9.0/include/device_functions.hpp" "$(@D)/cuda/include/device_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_functions_decls.h" "$(@D)/cuda/include/device_functions_decls.h" && cp "/usr/local/cuda-9.0/include/device_launch_parameters.h" "$(@D)/cuda/include/device_launch_parameters.h" && cp "/usr/local/cuda-9.0/include/device_types.h" "$(@D)/cuda/include/device_types.h" && cp "/usr/local/cuda-9.0/include/driver_functions.h" "$(@D)/cuda/include/driver_functions.h" && cp "/usr/local/cuda-9.0/include/driver_types.h" "$(@D)/cuda/include/driver_types.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuda.h" "$(@D)/cuda/include/dynlink_cuda.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuda_cuda.h" "$(@D)/cuda/include/dynlink_cuda_cuda.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuviddec.h" "$(@D)/cuda/include/dynlink_cuviddec.h" && cp "/usr/local/cuda-9.0/include/dynlink_nvcuvid.h" "$(@D)/cuda/include/dynlink_nvcuvid.h" && cp "/usr/local/cuda-9.0/include/fatBinaryCtl.h" "$(@D)/cuda/include/fatBinaryCtl.h" && cp "/usr/local/cuda-9.0/include/fatbinary.h" "$(@D)/cuda/include/fatbinary.h" && cp "/usr/local/cuda-9.0/include/host_config.h" "$(@D)/cuda/include/host_config.h" && cp "/usr/local/cuda-9.0/include/host_defines.h" "$(@D)/cuda/include/host_defines.h" && cp "/usr/local/cuda-9.0/include/library_types.h" "$(@D)/cuda/include/library_types.h" && cp "/usr/local/cuda-9.0/include/math_constants.h" "$(@D)/cuda/include/math_constants.h" && cp "/usr/local/cuda-9.0/include/math_functions.h" "$(@D)/cuda/include/math_functions.h" && cp "/usr/local/cuda-9.0/include/math_functions.hpp" "$(@D)/cuda/include/math_functions.hpp" && cp "/usr/local/cuda-9.0/include/math_functions_dbl_ptx3.h" "$(@D)/cuda/include/math_functions_dbl_ptx3.h" && cp "/usr/local/cuda-9.0/include/math_functions_dbl_ptx3.hpp" "$(@D)/cuda/include/math_functions_dbl_ptx3.hpp" && cp "/usr/local/cuda-9.0/include/mma.h" "$(@D)/cuda/include/mma.h" && cp "/usr/local/cuda-9.0/include/npp.h" "$(@D)/cuda/include/npp.h" && cp "/usr/local/cuda-9.0/include/nppcore.h" "$(@D)/cuda/include/nppcore.h" && cp "/usr/local/cuda-9.0/include/nppdefs.h" "$(@D)/cuda/include/nppdefs.h" && cp "/usr/local/cuda-9.0/include/nppi.h" "$(@D)/cuda/include/nppi.h" && cp "/usr/local/cuda-9.0/include/nppi_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/nppi_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-9.0/include/nppi_color_conversion.h" "$(@D)/cuda/include/nppi_color_conversion.h" && cp "/usr/local/cuda-9.0/include/nppi_compression_functions.h" "$(@D)/cuda/include/nppi_compression_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_computer_vision.h" "$(@D)/cuda/include/nppi_computer_vision.h" && cp "/usr/local/cuda-9.0/include/nppi_data_exchange_and_initialization.h" "$(@D)/cuda/include/nppi_data_exchange_and_initialization.h" && cp "/usr/local/cuda-9.0/include/nppi_filtering_functions.h" "$(@D)/cuda/include/nppi_filtering_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_geometry_transforms.h" "$(@D)/cuda/include/nppi_geometry_transforms.h" && cp "/usr/local/cuda-9.0/include/nppi_linear_transforms.h" "$(@D)/cuda/include/nppi_linear_transforms.h" && cp "/usr/local/cuda-9.0/include/nppi_morphological_operations.h" "$(@D)/cuda/include/nppi_morphological_operations.h" && cp "/usr/local/cuda-9.0/include/nppi_statistics_functions.h" "$(@D)/cuda/include/nppi_statistics_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_support_functions.h" "$(@D)/cuda/include/nppi_support_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_threshold_and_compare_operations.h" "$(@D)/cuda/include/nppi_threshold_and_compare_operations.h" && cp "/usr/local/cuda-9.0/include/npps.h" "$(@D)/cuda/include/npps.h" && cp "/usr/local/cuda-9.0/include/npps_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/npps_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-9.0/include/npps_conversion_functions.h" "$(@D)/cuda/include/npps_conversion_functions.h" && cp "/usr/local/cuda-9.0/include/npps_filtering_functions.h" "$(@D)/cuda/include/npps_filtering_functions.h" && cp "/usr/local/cuda-9.0/include/npps_initialization.h" "$(@D)/cuda/include/npps_initialization.h" && cp "/usr/local/cuda-9.0/include/npps_statistics_functions.h" "$(@D)/cuda/include/npps_statistics_functions.h" && cp "/usr/local/cuda-9.0/include/npps_support_functions.h" "$(@D)/cuda/include/npps_support_functions.h" && cp "/usr/local/cuda-9.0/include/nppversion.h" "$(@D)/cuda/include/nppversion.h" && cp "/usr/local/cuda-9.0/include/nvToolsExt.h" "$(@D)/cuda/include/nvToolsExt.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtCuda.h" "$(@D)/cuda/include/nvToolsExtCuda.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtCudaRt.h" "$(@D)/cuda/include/nvToolsExtCudaRt.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtMeta.h" "$(@D)/cuda/include/nvToolsExtMeta.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtSync.h" "$(@D)/cuda/include/nvToolsExtSync.h" && cp "/usr/local/cuda-9.0/include/nvblas.h" "$(@D)/cuda/include/nvblas.h" && cp "/usr/local/cuda-9.0/include/nvfunctional" "$(@D)/cuda/include/nvfunctional" && cp "/usr/local/cuda-9.0/include/nvgraph.h" "$(@D)/cuda/include/nvgraph.h" && cp "/usr/local/cuda-9.0/include/nvml.h" "$(@D)/cuda/include/nvml.h" && cp "/usr/local/cuda-9.0/include/nvrtc.h" "$(@D)/cuda/include/nvrtc.h" && cp "/usr/local/cuda-9.0/include/sm_20_atomic_functions.h" "$(@D)/cuda/include/sm_20_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_20_atomic_functions.hpp" "$(@D)/cuda/include/sm_20_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_20_intrinsics.h" "$(@D)/cuda/include/sm_20_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_20_intrinsics.hpp" "$(@D)/cuda/include/sm_20_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_30_intrinsics.h" "$(@D)/cuda/include/sm_30_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_30_intrinsics.hpp" "$(@D)/cuda/include/sm_30_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_32_atomic_functions.h" "$(@D)/cuda/include/sm_32_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_32_atomic_functions.hpp" "$(@D)/cuda/include/sm_32_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_32_intrinsics.h" "$(@D)/cuda/include/sm_32_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_32_intrinsics.hpp" "$(@D)/cuda/include/sm_32_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_35_atomic_functions.h" "$(@D)/cuda/include/sm_35_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_35_intrinsics.h" "$(@D)/cuda/include/sm_35_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_60_atomic_functions.h" "$(@D)/cuda/include/sm_60_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_60_atomic_functions.hpp" "$(@D)/cuda/include/sm_60_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_61_intrinsics.h" "$(@D)/cuda/include/sm_61_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_61_intrinsics.hpp" "$(@D)/cuda/include/sm_61_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sobol_direction_vectors.h" "$(@D)/cuda/include/sobol_direction_vectors.h" && cp "/usr/local/cuda-9.0/include/surface_functions.h" "$(@D)/cuda/include/surface_functions.h" && cp "/usr/local/cuda-9.0/include/surface_functions.hpp" "$(@D)/cuda/include/surface_functions.hpp" && cp "/usr/local/cuda-9.0/include/surface_indirect_functions.h" "$(@D)/cuda/include/surface_indirect_functions.h" && cp "/usr/local/cuda-9.0/include/surface_indirect_functions.hpp" "$(@D)/cuda/include/surface_indirect_functions.hpp" && cp "/usr/local/cuda-9.0/include/surface_types.h" "$(@D)/cuda/include/surface_types.h" && cp "/usr/local/cuda-9.0/include/texture_fetch_functions.h" "$(@D)/cuda/include/texture_fetch_functions.h" && cp "/usr/local/cuda-9.0/include/texture_fetch_functions.hpp" "$(@D)/cuda/include/texture_fetch_functions.hpp" && cp "/usr/local/cuda-9.0/include/texture_indirect_functions.h" "$(@D)/cuda/include/texture_indirect_functions.h" && cp "/usr/local/cuda-9.0/include/texture_indirect_functions.hpp" "$(@D)/cuda/include/texture_indirect_functions.hpp" && cp "/usr/local/cuda-9.0/include/texture_types.h" "$(@D)/cuda/include/texture_types.h" && cp "/usr/local/cuda-9.0/include/thrust/adjacent_difference.h" "$(@D)/cuda/include/thrust/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/advance.h" "$(@D)/cuda/include/thrust/advance.h" && cp "/usr/local/cuda-9.0/include/thrust/binary_search.h" "$(@D)/cuda/include/thrust/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/complex.h" "$(@D)/cuda/include/thrust/complex.h" && cp "/usr/local/cuda-9.0/include/thrust/copy.h" "$(@D)/cuda/include/thrust/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/count.h" "$(@D)/cuda/include/thrust/count.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/detail/adjacent_difference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/advance.inl" "$(@D)/cuda/include/thrust/detail/advance.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/allocator_traits.h" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/allocator_traits.inl" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/copy_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/copy_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/default_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/default_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/destroy_range.h" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/destroy_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/fill_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/fill_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/malloc_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/malloc_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/no_throw_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/no_throw_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/tagged_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/tagged_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/temporary_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/temporary_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/binary_search.inl" "$(@D)/cuda/include/thrust/detail/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/arithmetic.h" "$(@D)/cuda/include/thrust/detail/complex/arithmetic.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/c99math.h" "$(@D)/cuda/include/thrust/detail/complex/c99math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/catrig.h" "$(@D)/cuda/include/thrust/detail/complex/catrig.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/catrigf.h" "$(@D)/cuda/include/thrust/detail/complex/catrigf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ccosh.h" "$(@D)/cuda/include/thrust/detail/complex/ccosh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ccoshf.h" "$(@D)/cuda/include/thrust/detail/complex/ccoshf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cexp.h" "$(@D)/cuda/include/thrust/detail/complex/cexp.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cexpf.h" "$(@D)/cuda/include/thrust/detail/complex/cexpf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/clog.h" "$(@D)/cuda/include/thrust/detail/complex/clog.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/clogf.h" "$(@D)/cuda/include/thrust/detail/complex/clogf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/complex.inl" "$(@D)/cuda/include/thrust/detail/complex/complex.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cpow.h" "$(@D)/cuda/include/thrust/detail/complex/cpow.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cpowf.h" "$(@D)/cuda/include/thrust/detail/complex/cpowf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cproj.h" "$(@D)/cuda/include/thrust/detail/complex/cproj.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csinh.h" "$(@D)/cuda/include/thrust/detail/complex/csinh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csinhf.h" "$(@D)/cuda/include/thrust/detail/complex/csinhf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csqrt.h" "$(@D)/cuda/include/thrust/detail/complex/csqrt.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csqrtf.h" "$(@D)/cuda/include/thrust/detail/complex/csqrtf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ctanh.h" "$(@D)/cuda/include/thrust/detail/complex/ctanh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ctanhf.h" "$(@D)/cuda/include/thrust/detail/complex/ctanhf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/math_private.h" "$(@D)/cuda/include/thrust/detail/complex/math_private.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/stream.h" "$(@D)/cuda/include/thrust/detail/complex/stream.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config.h" "$(@D)/cuda/include/thrust/detail/config.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/compiler.h" "$(@D)/cuda/include/thrust/detail/config/compiler.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/compiler_fence.h" "$(@D)/cuda/include/thrust/detail/config/compiler_fence.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/config.h" "$(@D)/cuda/include/thrust/detail/config/config.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/debug.h" "$(@D)/cuda/include/thrust/detail/config/debug.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/device_system.h" "$(@D)/cuda/include/thrust/detail/config/device_system.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/exec_check_disable.h" "$(@D)/cuda/include/thrust/detail/config/exec_check_disable.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/forceinline.h" "$(@D)/cuda/include/thrust/detail/config/forceinline.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/global_workarounds.h" "$(@D)/cuda/include/thrust/detail/config/global_workarounds.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/host_device.h" "$(@D)/cuda/include/thrust/detail/config/host_device.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/host_system.h" "$(@D)/cuda/include/thrust/detail/config/host_system.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/simple_defines.h" "$(@D)/cuda/include/thrust/detail/config/simple_defines.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/contiguous_storage.h" "$(@D)/cuda/include/thrust/detail/contiguous_storage.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/contiguous_storage.inl" "$(@D)/cuda/include/thrust/detail/contiguous_storage.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy.h" "$(@D)/cuda/include/thrust/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy.inl" "$(@D)/cuda/include/thrust/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy_if.h" "$(@D)/cuda/include/thrust/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy_if.inl" "$(@D)/cuda/include/thrust/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/count.inl" "$(@D)/cuda/include/thrust/detail/count.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/cstdint.h" "$(@D)/cuda/include/thrust/detail/cstdint.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_delete.inl" "$(@D)/cuda/include/thrust/detail/device_delete.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_free.inl" "$(@D)/cuda/include/thrust/detail/device_free.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_malloc.inl" "$(@D)/cuda/include/thrust/detail/device_malloc.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_new.inl" "$(@D)/cuda/include/thrust/detail/device_new.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_ptr.inl" "$(@D)/cuda/include/thrust/detail/device_ptr.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_reference.inl" "$(@D)/cuda/include/thrust/detail/device_reference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_vector.inl" "$(@D)/cuda/include/thrust/detail/device_vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/dispatch/is_trivial_copy.h" "$(@D)/cuda/include/thrust/detail/dispatch/is_trivial_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/distance.inl" "$(@D)/cuda/include/thrust/detail/distance.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/equal.inl" "$(@D)/cuda/include/thrust/detail/equal.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/execute_with_allocator.h" "$(@D)/cuda/include/thrust/detail/execute_with_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/execution_policy.h" "$(@D)/cuda/include/thrust/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/extrema.inl" "$(@D)/cuda/include/thrust/detail/extrema.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/fill.inl" "$(@D)/cuda/include/thrust/detail/fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/find.inl" "$(@D)/cuda/include/thrust/detail/find.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/for_each.inl" "$(@D)/cuda/include/thrust/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/function.h" "$(@D)/cuda/include/thrust/detail/function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional.inl" "$(@D)/cuda/include/thrust/detail/functional.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/actor.h" "$(@D)/cuda/include/thrust/detail/functional/actor.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/actor.inl" "$(@D)/cuda/include/thrust/detail/functional/actor.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/argument.h" "$(@D)/cuda/include/thrust/detail/functional/argument.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/composite.h" "$(@D)/cuda/include/thrust/detail/functional/composite.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/arithmetic_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/arithmetic_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/assignment_operator.h" "$(@D)/cuda/include/thrust/detail/functional/operators/assignment_operator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/bitwise_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/bitwise_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/compound_assignment_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/logical_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/logical_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/operator_adaptors.h" "$(@D)/cuda/include/thrust/detail/functional/operators/operator_adaptors.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/relational_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/relational_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/placeholder.h" "$(@D)/cuda/include/thrust/detail/functional/placeholder.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/value.h" "$(@D)/cuda/include/thrust/detail/functional/value.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/gather.inl" "$(@D)/cuda/include/thrust/detail/gather.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/generate.inl" "$(@D)/cuda/include/thrust/detail/generate.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/get_iterator_value.h" "$(@D)/cuda/include/thrust/detail/get_iterator_value.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/host_vector.inl" "$(@D)/cuda/include/thrust/detail/host_vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/inner_product.inl" "$(@D)/cuda/include/thrust/detail/inner_product.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/integer_math.h" "$(@D)/cuda/include/thrust/detail/integer_math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/integer_traits.h" "$(@D)/cuda/include/thrust/detail/integer_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/internal_functional.h" "$(@D)/cuda/include/thrust/detail/internal_functional.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/logical.inl" "$(@D)/cuda/include/thrust/detail/logical.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/merge.inl" "$(@D)/cuda/include/thrust/detail/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/minmax.h" "$(@D)/cuda/include/thrust/detail/minmax.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/mismatch.inl" "$(@D)/cuda/include/thrust/detail/mismatch.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/mpl/math.h" "$(@D)/cuda/include/thrust/detail/mpl/math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/numeric_traits.h" "$(@D)/cuda/include/thrust/detail/numeric_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/overlapped_copy.h" "$(@D)/cuda/include/thrust/detail/overlapped_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/pair.inl" "$(@D)/cuda/include/thrust/detail/pair.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/partition.inl" "$(@D)/cuda/include/thrust/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/pointer.h" "$(@D)/cuda/include/thrust/detail/pointer.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/pointer.inl" "$(@D)/cuda/include/thrust/detail/pointer.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/range/head_flags.h" "$(@D)/cuda/include/thrust/detail/range/head_flags.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/range/tail_flags.h" "$(@D)/cuda/include/thrust/detail/range/tail_flags.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/raw_pointer_cast.h" "$(@D)/cuda/include/thrust/detail/raw_pointer_cast.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/raw_reference_cast.h" "$(@D)/cuda/include/thrust/detail/raw_reference_cast.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/reduce.inl" "$(@D)/cuda/include/thrust/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference.h" "$(@D)/cuda/include/thrust/detail/reference.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference.inl" "$(@D)/cuda/include/thrust/detail/reference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference_forward_declaration.h" "$(@D)/cuda/include/thrust/detail/reference_forward_declaration.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/remove.inl" "$(@D)/cuda/include/thrust/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/replace.inl" "$(@D)/cuda/include/thrust/detail/replace.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reverse.inl" "$(@D)/cuda/include/thrust/detail/reverse.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/scan.inl" "$(@D)/cuda/include/thrust/detail/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/scatter.inl" "$(@D)/cuda/include/thrust/detail/scatter.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/seq.h" "$(@D)/cuda/include/thrust/detail/seq.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/sequence.inl" "$(@D)/cuda/include/thrust/detail/sequence.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/set_operations.inl" "$(@D)/cuda/include/thrust/detail/set_operations.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/sort.inl" "$(@D)/cuda/include/thrust/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/static_assert.h" "$(@D)/cuda/include/thrust/detail/static_assert.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/static_map.h" "$(@D)/cuda/include/thrust/detail/static_map.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap.h" "$(@D)/cuda/include/thrust/detail/swap.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap.inl" "$(@D)/cuda/include/thrust/detail/swap.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap_ranges.inl" "$(@D)/cuda/include/thrust/detail/swap_ranges.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/tabulate.inl" "$(@D)/cuda/include/thrust/detail/tabulate.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_array.h" "$(@D)/cuda/include/thrust/detail/temporary_array.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_array.inl" "$(@D)/cuda/include/thrust/detail/temporary_array.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform.inl" "$(@D)/cuda/include/thrust/detail/transform.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform_reduce.inl" "$(@D)/cuda/include/thrust/detail/transform_reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform_scan.inl" "$(@D)/cuda/include/thrust/detail/transform_scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/trivial_sequence.h" "$(@D)/cuda/include/thrust/detail/trivial_sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple.inl" "$(@D)/cuda/include/thrust/detail/tuple.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple_meta_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_meta_transform.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_transform.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" "$(@D)/cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/function_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/function_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_member_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_member_function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_nested_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_nested_type.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_trivial_assign.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_trivial_assign.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/is_call_possible.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_call_possible.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/is_metafunction_defined.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_metafunction_defined.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/iterator/is_output_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/minimum_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/minimum_type.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/pointer_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/pointer_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/result_of_adaptable_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/unique.inl" "$(@D)/cuda/include/thrust/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/use_default.h" "$(@D)/cuda/include/thrust/detail/use_default.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/util/align.h" "$(@D)/cuda/include/thrust/detail/util/align.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/util/blocking.h" "$(@D)/cuda/include/thrust/detail/util/blocking.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/vector_base.h" "$(@D)/cuda/include/thrust/detail/vector_base.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/vector_base.inl" "$(@D)/cuda/include/thrust/detail/vector_base.inl" && cp "/usr/local/cuda-9.0/include/thrust/device_allocator.h" "$(@D)/cuda/include/thrust/device_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_delete.h" "$(@D)/cuda/include/thrust/device_delete.h" && cp "/usr/local/cuda-9.0/include/thrust/device_free.h" "$(@D)/cuda/include/thrust/device_free.h" && cp "/usr/local/cuda-9.0/include/thrust/device_malloc.h" "$(@D)/cuda/include/thrust/device_malloc.h" && cp "/usr/local/cuda-9.0/include/thrust/device_malloc_allocator.h" "$(@D)/cuda/include/thrust/device_malloc_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_new.h" "$(@D)/cuda/include/thrust/device_new.h" && cp "/usr/local/cuda-9.0/include/thrust/device_new_allocator.h" "$(@D)/cuda/include/thrust/device_new_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_ptr.h" "$(@D)/cuda/include/thrust/device_ptr.h" && cp "/usr/local/cuda-9.0/include/thrust/device_reference.h" "$(@D)/cuda/include/thrust/device_reference.h" && cp "/usr/local/cuda-9.0/include/thrust/device_vector.h" "$(@D)/cuda/include/thrust/device_vector.h" && cp "/usr/local/cuda-9.0/include/thrust/distance.h" "$(@D)/cuda/include/thrust/distance.h" && cp "/usr/local/cuda-9.0/include/thrust/equal.h" "$(@D)/cuda/include/thrust/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/execution_policy.h" "$(@D)/cuda/include/thrust/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/extrema.h" "$(@D)/cuda/include/thrust/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/fill.h" "$(@D)/cuda/include/thrust/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/find.h" "$(@D)/cuda/include/thrust/find.h" && cp "/usr/local/cuda-9.0/include/thrust/for_each.h" "$(@D)/cuda/include/thrust/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/functional.h" "$(@D)/cuda/include/thrust/functional.h" && cp "/usr/local/cuda-9.0/include/thrust/gather.h" "$(@D)/cuda/include/thrust/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/generate.h" "$(@D)/cuda/include/thrust/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/host_vector.h" "$(@D)/cuda/include/thrust/host_vector.h" && cp "/usr/local/cuda-9.0/include/thrust/inner_product.h" "$(@D)/cuda/include/thrust/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/constant_iterator.h" "$(@D)/cuda/include/thrust/iterator/constant_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/counting_iterator.h" "$(@D)/cuda/include/thrust/iterator/counting_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/any_assign.h" "$(@D)/cuda/include/thrust/iterator/detail/any_assign.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/any_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/any_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/constant_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/constant_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/counting_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/counting_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/device_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/device_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/discard_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/discard_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/distance_from_result.h" "$(@D)/cuda/include/thrust/iterator/detail/distance_from_result.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/host_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/host_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/is_iterator_category.h" "$(@D)/cuda/include/thrust/iterator/detail/is_iterator_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/is_trivial_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/is_trivial_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_adaptor_base.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_adaptor_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_to_system.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_system.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_to_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_facade_category.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_facade_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_traits.inl" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traits.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_traversal_tags.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traversal_tags.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/join_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/join_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/minimum_category.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/minimum_system.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_system.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/normal_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/normal_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/permutation_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/permutation_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/retag.h" "$(@D)/cuda/include/thrust/iterator/detail/retag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/reverse_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/reverse_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/tagged_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/tagged_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/transform_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/transform_output_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_output_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/tuple_of_iterator_references.h" "$(@D)/cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/universal_categories.h" "$(@D)/cuda/include/thrust/iterator/detail/universal_categories.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/zip_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/zip_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/discard_iterator.h" "$(@D)/cuda/include/thrust/iterator/discard_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_adaptor.h" "$(@D)/cuda/include/thrust/iterator/iterator_adaptor.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_categories.h" "$(@D)/cuda/include/thrust/iterator/iterator_categories.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_facade.h" "$(@D)/cuda/include/thrust/iterator/iterator_facade.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_traits.h" "$(@D)/cuda/include/thrust/iterator/iterator_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/permutation_iterator.h" "$(@D)/cuda/include/thrust/iterator/permutation_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/retag.h" "$(@D)/cuda/include/thrust/iterator/retag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/reverse_iterator.h" "$(@D)/cuda/include/thrust/iterator/reverse_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/transform_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/transform_output_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_output_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/zip_iterator.h" "$(@D)/cuda/include/thrust/iterator/zip_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/logical.h" "$(@D)/cuda/include/thrust/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/memory.h" "$(@D)/cuda/include/thrust/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/merge.h" "$(@D)/cuda/include/thrust/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/mismatch.h" "$(@D)/cuda/include/thrust/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/pair.h" "$(@D)/cuda/include/thrust/pair.h" && cp "/usr/local/cuda-9.0/include/thrust/partition.h" "$(@D)/cuda/include/thrust/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/random.h" "$(@D)/cuda/include/thrust/random.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/discard_block_engine.inl" "$(@D)/cuda/include/thrust/random/detail/discard_block_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_congruential_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_congruential_engine_discard.h" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine_discard.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_feedback_shift_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/mod.h" "$(@D)/cuda/include/thrust/random/detail/mod.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/normal_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/normal_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/normal_distribution_base.h" "$(@D)/cuda/include/thrust/random/detail/normal_distribution_base.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/random_core_access.h" "$(@D)/cuda/include/thrust/random/detail/random_core_access.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/subtract_with_carry_engine.inl" "$(@D)/cuda/include/thrust/random/detail/subtract_with_carry_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/uniform_int_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_int_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/uniform_real_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_real_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/xor_combine_engine.inl" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/xor_combine_engine_max.h" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine_max.h" && cp "/usr/local/cuda-9.0/include/thrust/random/discard_block_engine.h" "$(@D)/cuda/include/thrust/random/discard_block_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/linear_congruential_engine.h" "$(@D)/cuda/include/thrust/random/linear_congruential_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/linear_feedback_shift_engine.h" "$(@D)/cuda/include/thrust/random/linear_feedback_shift_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/normal_distribution.h" "$(@D)/cuda/include/thrust/random/normal_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/subtract_with_carry_engine.h" "$(@D)/cuda/include/thrust/random/subtract_with_carry_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/uniform_int_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_int_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/uniform_real_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_real_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/xor_combine_engine.h" "$(@D)/cuda/include/thrust/random/xor_combine_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/reduce.h" "$(@D)/cuda/include/thrust/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/remove.h" "$(@D)/cuda/include/thrust/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/replace.h" "$(@D)/cuda/include/thrust/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/reverse.h" "$(@D)/cuda/include/thrust/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/scan.h" "$(@D)/cuda/include/thrust/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/scatter.h" "$(@D)/cuda/include/thrust/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/sequence.h" "$(@D)/cuda/include/thrust/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/set_operations.h" "$(@D)/cuda/include/thrust/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/sort.h" "$(@D)/cuda/include/thrust/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/swap.h" "$(@D)/cuda/include/thrust/swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cpp/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cpp/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/count.h" "$(@D)/cuda/include/thrust/system/cpp/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/equal.h" "$(@D)/cuda/include/thrust/system/cpp/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cpp/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/find.h" "$(@D)/cuda/include/thrust/system/cpp/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cpp/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/gather.h" "$(@D)/cuda/include/thrust/system/cpp/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/generate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cpp/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cpp/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/logical.h" "$(@D)/cuda/include/thrust/system/cpp/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cpp/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/merge.h" "$(@D)/cuda/include/thrust/system/cpp/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cpp/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/par.h" "$(@D)/cuda/include/thrust/system/cpp/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/partition.h" "$(@D)/cuda/include/thrust/system/cpp/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/remove.h" "$(@D)/cuda/include/thrust/system/cpp/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/replace.h" "$(@D)/cuda/include/thrust/system/cpp/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cpp/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/sort.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cpp/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cpp/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/unique.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/memory.h" "$(@D)/cuda/include/thrust/system/cpp/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/vector.h" "$(@D)/cuda/include/thrust/system/cpp/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/config.h" "$(@D)/cuda/include/thrust/system/cuda/config.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cuda/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/agent_launcher.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/agent_launcher.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/alignment.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/alignment.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/util.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/count.h" "$(@D)/cuda/include/thrust/system/cuda/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cross_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/cub.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/cub.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/host/mutex.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/host/mutex.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_allocator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_arch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_debug.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_device.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_device.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_macro.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_namespace.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_ptx.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_type.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_type.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/equal.h" "$(@D)/cuda/include/thrust/system/cuda/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/error.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/error.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/find.h" "$(@D)/cuda/include/thrust/system/cuda/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/gather.h" "$(@D)/cuda/include/thrust/system/cuda/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/generate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/guarded_driver_types.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_driver_types.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cuda/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/internal/copy_cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_cross_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cuda/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/logical.h" "$(@D)/cuda/include/thrust/system/cuda/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cuda/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/memory_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/memory_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/par.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/par_to_seq.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par_to_seq.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/parallel_for.h" "$(@D)/cuda/include/thrust/system/cuda/detail/parallel_for.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/partition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/remove.h" "$(@D)/cuda/include/thrust/system/cuda/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/replace.h" "$(@D)/cuda/include/thrust/system/cuda/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cuda/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cuda/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/terminate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/terminate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/unique.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/util.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/error.h" "$(@D)/cuda/include/thrust/system/cuda/error.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/experimental/pinned_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/experimental/pinned_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/memory.h" "$(@D)/cuda/include/thrust/system/cuda/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/vector.h" "$(@D)/cuda/include/thrust/system/cuda/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/adl/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/adl/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/count.h" "$(@D)/cuda/include/thrust/system/detail/adl/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/equal.h" "$(@D)/cuda/include/thrust/system/detail/adl/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/extrema.h" "$(@D)/cuda/include/thrust/system/detail/adl/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/find.h" "$(@D)/cuda/include/thrust/system/detail/adl/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/for_each.h" "$(@D)/cuda/include/thrust/system/detail/adl/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/gather.h" "$(@D)/cuda/include/thrust/system/detail/adl/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/generate.h" "$(@D)/cuda/include/thrust/system/detail/adl/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/get_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/adl/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/adl/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/logical.h" "$(@D)/cuda/include/thrust/system/detail/adl/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/adl/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/merge.h" "$(@D)/cuda/include/thrust/system/detail/adl/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/adl/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/partition.h" "$(@D)/cuda/include/thrust/system/detail/adl/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/remove.h" "$(@D)/cuda/include/thrust/system/detail/adl/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/replace.h" "$(@D)/cuda/include/thrust/system/detail/adl/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reverse.h" "$(@D)/cuda/include/thrust/system/detail/adl/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scatter.h" "$(@D)/cuda/include/thrust/system/detail/adl/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/sequence.h" "$(@D)/cuda/include/thrust/system/detail/adl/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/adl/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/sort.h" "$(@D)/cuda/include/thrust/system/detail/adl/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/adl/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/adl/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/adl/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/unique.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/bad_alloc.h" "$(@D)/cuda/include/thrust/system/detail/bad_alloc.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/errno.h" "$(@D)/cuda/include/thrust/system/detail/errno.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_category.inl" "$(@D)/cuda/include/thrust/system/detail/error_category.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_code.inl" "$(@D)/cuda/include/thrust/system/detail/error_code.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_condition.inl" "$(@D)/cuda/include/thrust/system/detail/error_condition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/advance.h" "$(@D)/cuda/include/thrust/system/detail/generic/advance.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/advance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/advance.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy_if.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/count.h" "$(@D)/cuda/include/thrust/system/detail/generic/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/count.inl" "$(@D)/cuda/include/thrust/system/detail/generic/count.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/distance.h" "$(@D)/cuda/include/thrust/system/detail/generic/distance.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/distance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/distance.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/equal.h" "$(@D)/cuda/include/thrust/system/detail/generic/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/equal.inl" "$(@D)/cuda/include/thrust/system/detail/generic/equal.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/extrema.h" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/extrema.inl" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/find.h" "$(@D)/cuda/include/thrust/system/detail/generic/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/find.inl" "$(@D)/cuda/include/thrust/system/detail/generic/find.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/for_each.h" "$(@D)/cuda/include/thrust/system/detail/generic/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/gather.h" "$(@D)/cuda/include/thrust/system/detail/generic/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/gather.inl" "$(@D)/cuda/include/thrust/system/detail/generic/gather.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/generate.h" "$(@D)/cuda/include/thrust/system/detail/generic/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/generate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/generate.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/inner_product.inl" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/logical.h" "$(@D)/cuda/include/thrust/system/detail/generic/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/memory.h" "$(@D)/cuda/include/thrust/system/detail/generic/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/memory.inl" "$(@D)/cuda/include/thrust/system/detail/generic/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/merge.h" "$(@D)/cuda/include/thrust/system/detail/generic/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/merge.inl" "$(@D)/cuda/include/thrust/system/detail/generic/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/mismatch.inl" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/partition.h" "$(@D)/cuda/include/thrust/system/detail/generic/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/partition.inl" "$(@D)/cuda/include/thrust/system/detail/generic/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/remove.h" "$(@D)/cuda/include/thrust/system/detail/generic/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/remove.inl" "$(@D)/cuda/include/thrust/system/detail/generic/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/replace.h" "$(@D)/cuda/include/thrust/system/detail/generic/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/replace.inl" "$(@D)/cuda/include/thrust/system/detail/generic/replace.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reverse.h" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reverse.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scalar/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scalar/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scatter.h" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scatter.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/select_system.h" "$(@D)/cuda/include/thrust/system/detail/generic/select_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sequence.h" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sequence.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/set_operations.inl" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sort.h" "$(@D)/cuda/include/thrust/system/detail/generic/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sort.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/swap_ranges.inl" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tabulate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tag.h" "$(@D)/cuda/include/thrust/system/detail/generic/tag.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/temporary_buffer.inl" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/type_traits.h" "$(@D)/cuda/include/thrust/system/detail/generic/type_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/internal/decompose.h" "$(@D)/cuda/include/thrust/system/detail/internal/decompose.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/sequential/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/sequential/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy_backward.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_backward.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/count.h" "$(@D)/cuda/include/thrust/system/detail/sequential/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/equal.h" "$(@D)/cuda/include/thrust/system/detail/sequential/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/execution_policy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/extrema.h" "$(@D)/cuda/include/thrust/system/detail/sequential/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/find.h" "$(@D)/cuda/include/thrust/system/detail/sequential/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/for_each.h" "$(@D)/cuda/include/thrust/system/detail/sequential/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/gather.h" "$(@D)/cuda/include/thrust/system/detail/sequential/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/general_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/general_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/generate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/get_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/sequential/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/insertion_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/insertion_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/sequential/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/logical.h" "$(@D)/cuda/include/thrust/system/detail/sequential/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/sequential/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/merge.h" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/merge.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/sequential/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/partition.h" "$(@D)/cuda/include/thrust/system/detail/sequential/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/remove.h" "$(@D)/cuda/include/thrust/system/detail/sequential/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/replace.h" "$(@D)/cuda/include/thrust/system/detail/sequential/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reverse.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scatter.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sequence.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/sequential/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/sequential/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/sequential/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/trivial_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/trivial_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/unique.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/system_error.inl" "$(@D)/cuda/include/thrust/system/detail/system_error.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/error_code.h" "$(@D)/cuda/include/thrust/system/error_code.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/omp/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/omp/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/count.h" "$(@D)/cuda/include/thrust/system/omp/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/equal.h" "$(@D)/cuda/include/thrust/system/omp/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/omp/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/find.h" "$(@D)/cuda/include/thrust/system/omp/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/gather.h" "$(@D)/cuda/include/thrust/system/omp/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/generate.h" "$(@D)/cuda/include/thrust/system/omp/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/omp/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/omp/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/logical.h" "$(@D)/cuda/include/thrust/system/omp/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/omp/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/omp/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/merge.h" "$(@D)/cuda/include/thrust/system/omp/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/omp/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/par.h" "$(@D)/cuda/include/thrust/system/omp/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/partition.h" "$(@D)/cuda/include/thrust/system/omp/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/partition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/remove.h" "$(@D)/cuda/include/thrust/system/omp/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/remove.inl" "$(@D)/cuda/include/thrust/system/omp/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/replace.h" "$(@D)/cuda/include/thrust/system/omp/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/omp/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/omp/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/omp/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/omp/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sort.h" "$(@D)/cuda/include/thrust/system/omp/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sort.inl" "$(@D)/cuda/include/thrust/system/omp/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/omp/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/omp/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/omp/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/omp/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/memory.h" "$(@D)/cuda/include/thrust/system/omp/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/vector.h" "$(@D)/cuda/include/thrust/system/omp/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/system_error.h" "$(@D)/cuda/include/thrust/system/system_error.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/tbb/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/tbb/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/count.h" "$(@D)/cuda/include/thrust/system/tbb/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/equal.h" "$(@D)/cuda/include/thrust/system/tbb/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/extrema.h" "$(@D)/cuda/include/thrust/system/tbb/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/find.h" "$(@D)/cuda/include/thrust/system/tbb/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/for_each.h" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/gather.h" "$(@D)/cuda/include/thrust/system/tbb/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/generate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/get_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/tbb/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/tbb/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/logical.h" "$(@D)/cuda/include/thrust/system/tbb/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/tbb/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/memory.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/merge.h" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/merge.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/tbb/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/par.h" "$(@D)/cuda/include/thrust/system/tbb/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/partition.h" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/partition.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_intervals.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/remove.h" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/remove.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/replace.h" "$(@D)/cuda/include/thrust/system/tbb/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reverse.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scatter.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sequence.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/tbb/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sort.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sort.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/tbb/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/tbb/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/vector.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/memory.h" "$(@D)/cuda/include/thrust/system/tbb/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/vector.h" "$(@D)/cuda/include/thrust/system/tbb/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system_error.h" "$(@D)/cuda/include/thrust/system_error.h" && cp "/usr/local/cuda-9.0/include/thrust/tabulate.h" "$(@D)/cuda/include/thrust/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/transform.h" "$(@D)/cuda/include/thrust/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/transform_reduce.h" "$(@D)/cuda/include/thrust/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/transform_scan.h" "$(@D)/cuda/include/thrust/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/tuple.h" "$(@D)/cuda/include/thrust/tuple.h" && cp "/usr/local/cuda-9.0/include/thrust/uninitialized_copy.h" "$(@D)/cuda/include/thrust/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/uninitialized_fill.h" "$(@D)/cuda/include/thrust/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/unique.h" "$(@D)/cuda/include/thrust/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/version.h" "$(@D)/cuda/include/thrust/version.h" && cp "/usr/local/cuda-9.0/include/vector_functions.h" "$(@D)/cuda/include/vector_functions.h" && cp "/usr/local/cuda-9.0/include/vector_functions.hpp" "$(@D)/cuda/include/vector_functions.hpp" && cp "/usr/local/cuda-9.0/include/vector_types.h" "$(@D)/cuda/include/vector_types.h" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-10.0/include/CL/cl.h" "$(@D)/cuda/include/CL/cl.h" && cp "/usr/local/cuda-10.0/include/CL/cl.hpp" "$(@D)/cuda/include/CL/cl.hpp" && cp "/usr/local/cuda-10.0/include/CL/cl_egl.h" "$(@D)/cuda/include/CL/cl_egl.h" && cp "/usr/local/cuda-10.0/include/CL/cl_ext.h" "$(@D)/cuda/include/CL/cl_ext.h" && cp "/usr/local/cuda-10.0/include/CL/cl_gl.h" "$(@D)/cuda/include/CL/cl_gl.h" && cp "/usr/local/cuda-10.0/include/CL/cl_gl_ext.h" "$(@D)/cuda/include/CL/cl_gl_ext.h" && cp "/usr/local/cuda-10.0/include/CL/cl_platform.h" "$(@D)/cuda/include/CL/cl_platform.h" && cp "/usr/local/cuda-10.0/include/CL/opencl.h" "$(@D)/cuda/include/CL/opencl.h" && cp "/usr/local/cuda-10.0/include/builtin_types.h" "$(@D)/cuda/include/builtin_types.h" && cp "/usr/local/cuda-10.0/include/channel_descriptor.h" "$(@D)/cuda/include/channel_descriptor.h" && cp "/usr/local/cuda-10.0/include/common_functions.h" "$(@D)/cuda/include/common_functions.h" && cp "/usr/local/cuda-10.0/include/cooperative_groups.h" "$(@D)/cuda/include/cooperative_groups.h" && cp "/usr/local/cuda-10.0/include/cooperative_groups_helpers.h" "$(@D)/cuda/include/cooperative_groups_helpers.h" && cp "/usr/local/cuda-10.0/include/crt/common_functions.h" "$(@D)/cuda/include/crt/common_functions.h" && cp "/usr/local/cuda-10.0/include/crt/device_double_functions.h" "$(@D)/cuda/include/crt/device_double_functions.h" && cp "/usr/local/cuda-10.0/include/crt/device_double_functions.hpp" "$(@D)/cuda/include/crt/device_double_functions.hpp" && cp "/usr/local/cuda-10.0/include/crt/device_functions.h" "$(@D)/cuda/include/crt/device_functions.h" && cp "/usr/local/cuda-10.0/include/crt/device_functions.hpp" "$(@D)/cuda/include/crt/device_functions.hpp" && cp "/usr/local/cuda-10.0/include/crt/func_macro.h" "$(@D)/cuda/include/crt/func_macro.h" && cp "/usr/local/cuda-10.0/include/crt/host_config.h" "$(@D)/cuda/include/crt/host_config.h" && cp "/usr/local/cuda-10.0/include/crt/host_defines.h" "$(@D)/cuda/include/crt/host_defines.h" && cp "/usr/local/cuda-10.0/include/crt/host_runtime.h" "$(@D)/cuda/include/crt/host_runtime.h" && cp "/usr/local/cuda-10.0/include/crt/math_functions.h" "$(@D)/cuda/include/crt/math_functions.h" && cp "/usr/local/cuda-10.0/include/crt/math_functions.hpp" "$(@D)/cuda/include/crt/math_functions.hpp" && cp "/usr/local/cuda-10.0/include/crt/mma.h" "$(@D)/cuda/include/crt/mma.h" && cp "/usr/local/cuda-10.0/include/crt/mma.hpp" "$(@D)/cuda/include/crt/mma.hpp" && cp "/usr/local/cuda-10.0/include/crt/nvfunctional" "$(@D)/cuda/include/crt/nvfunctional" && cp "/usr/local/cuda-10.0/include/crt/sm_70_rt.h" "$(@D)/cuda/include/crt/sm_70_rt.h" && cp "/usr/local/cuda-10.0/include/crt/sm_70_rt.hpp" "$(@D)/cuda/include/crt/sm_70_rt.hpp" && cp "/usr/local/cuda-10.0/include/crt/storage_class.h" "$(@D)/cuda/include/crt/storage_class.h" && cp "/usr/local/cuda-10.0/include/cuComplex.h" "$(@D)/cuda/include/cuComplex.h" && cp "/usr/local/cuda-10.0/include/cublas.h" "$(@D)/cuda/include/cublas.h" && cp "/usr/local/cuda-10.0/include/cublasXt.h" "$(@D)/cuda/include/cublasXt.h" && cp "/usr/local/cuda-10.0/include/cublas_api.h" "$(@D)/cuda/include/cublas_api.h" && cp "/usr/local/cuda-10.0/include/cublas_v2.h" "$(@D)/cuda/include/cublas_v2.h" && cp "/usr/local/cuda-10.0/include/cuda.h" "$(@D)/cuda/include/cuda.h" && cp "/usr/local/cuda-10.0/include/cudaEGL.h" "$(@D)/cuda/include/cudaEGL.h" && cp "/usr/local/cuda-10.0/include/cudaGL.h" "$(@D)/cuda/include/cudaGL.h" && cp "/usr/local/cuda-10.0/include/cudaProfiler.h" "$(@D)/cuda/include/cudaProfiler.h" && cp "/usr/local/cuda-10.0/include/cudaVDPAU.h" "$(@D)/cuda/include/cudaVDPAU.h" && cp "/usr/local/cuda-10.0/include/cuda_device_runtime_api.h" "$(@D)/cuda/include/cuda_device_runtime_api.h" && cp "/usr/local/cuda-10.0/include/cuda_fp16.h" "$(@D)/cuda/include/cuda_fp16.h" && cp "/usr/local/cuda-10.0/include/cuda_fp16.hpp" "$(@D)/cuda/include/cuda_fp16.hpp" && cp "/usr/local/cuda-10.0/include/cuda_gl_interop.h" "$(@D)/cuda/include/cuda_gl_interop.h" && cp "/usr/local/cuda-10.0/include/cuda_occupancy.h" "$(@D)/cuda/include/cuda_occupancy.h" && cp "/usr/local/cuda-10.0/include/cuda_profiler_api.h" "$(@D)/cuda/include/cuda_profiler_api.h" && cp "/usr/local/cuda-10.0/include/cuda_runtime.h" "$(@D)/cuda/include/cuda_runtime.h" && cp "/usr/local/cuda-10.0/include/cuda_runtime_api.h" "$(@D)/cuda/include/cuda_runtime_api.h" && cp "/usr/local/cuda-10.0/include/cuda_surface_types.h" "$(@D)/cuda/include/cuda_surface_types.h" && cp "/usr/local/cuda-10.0/include/cuda_texture_types.h" "$(@D)/cuda/include/cuda_texture_types.h" && cp "/usr/local/cuda-10.0/include/cuda_vdpau_interop.h" "$(@D)/cuda/include/cuda_vdpau_interop.h" && cp "/usr/local/cuda-10.0/include/cudalibxt.h" "$(@D)/cuda/include/cudalibxt.h" && cp "/usr/local/cuda-10.0/include/cudnn.h" "$(@D)/cuda/include/cudnn.h" && cp "/usr/local/cuda-10.0/include/cufft.h" "$(@D)/cuda/include/cufft.h" && cp "/usr/local/cuda-10.0/include/cufftXt.h" "$(@D)/cuda/include/cufftXt.h" && cp "/usr/local/cuda-10.0/include/cufftw.h" "$(@D)/cuda/include/cufftw.h" && cp "/usr/local/cuda-10.0/include/curand.h" "$(@D)/cuda/include/curand.h" && cp "/usr/local/cuda-10.0/include/curand_discrete.h" "$(@D)/cuda/include/curand_discrete.h" && cp "/usr/local/cuda-10.0/include/curand_discrete2.h" "$(@D)/cuda/include/curand_discrete2.h" && cp "/usr/local/cuda-10.0/include/curand_globals.h" "$(@D)/cuda/include/curand_globals.h" && cp "/usr/local/cuda-10.0/include/curand_kernel.h" "$(@D)/cuda/include/curand_kernel.h" && cp "/usr/local/cuda-10.0/include/curand_lognormal.h" "$(@D)/cuda/include/curand_lognormal.h" && cp "/usr/local/cuda-10.0/include/curand_mrg32k3a.h" "$(@D)/cuda/include/curand_mrg32k3a.h" && cp "/usr/local/cuda-10.0/include/curand_mtgp32.h" "$(@D)/cuda/include/curand_mtgp32.h" && cp "/usr/local/cuda-10.0/include/curand_mtgp32_host.h" "$(@D)/cuda/include/curand_mtgp32_host.h" && cp "/usr/local/cuda-10.0/include/curand_mtgp32_kernel.h" "$(@D)/cuda/include/curand_mtgp32_kernel.h" && cp "/usr/local/cuda-10.0/include/curand_mtgp32dc_p_11213.h" "$(@D)/cuda/include/curand_mtgp32dc_p_11213.h" && cp "/usr/local/cuda-10.0/include/curand_normal.h" "$(@D)/cuda/include/curand_normal.h" && cp "/usr/local/cuda-10.0/include/curand_normal_static.h" "$(@D)/cuda/include/curand_normal_static.h" && cp "/usr/local/cuda-10.0/include/curand_philox4x32_x.h" "$(@D)/cuda/include/curand_philox4x32_x.h" && cp "/usr/local/cuda-10.0/include/curand_poisson.h" "$(@D)/cuda/include/curand_poisson.h" && cp "/usr/local/cuda-10.0/include/curand_precalc.h" "$(@D)/cuda/include/curand_precalc.h" && cp "/usr/local/cuda-10.0/include/curand_uniform.h" "$(@D)/cuda/include/curand_uniform.h" && cp "/usr/local/cuda-10.0/include/cusolverDn.h" "$(@D)/cuda/include/cusolverDn.h" && cp "/usr/local/cuda-10.0/include/cusolverRf.h" "$(@D)/cuda/include/cusolverRf.h" && cp "/usr/local/cuda-10.0/include/cusolverSp.h" "$(@D)/cuda/include/cusolverSp.h" && cp "/usr/local/cuda-10.0/include/cusolverSp_LOWLEVEL_PREVIEW.h" "$(@D)/cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h" && cp "/usr/local/cuda-10.0/include/cusolver_common.h" "$(@D)/cuda/include/cusolver_common.h" && cp "/usr/local/cuda-10.0/include/cusparse.h" "$(@D)/cuda/include/cusparse.h" && cp "/usr/local/cuda-10.0/include/cusparse_v2.h" "$(@D)/cuda/include/cusparse_v2.h" && cp "/usr/local/cuda-10.0/include/device_atomic_functions.h" "$(@D)/cuda/include/device_atomic_functions.h" && cp "/usr/local/cuda-10.0/include/device_atomic_functions.hpp" "$(@D)/cuda/include/device_atomic_functions.hpp" && cp "/usr/local/cuda-10.0/include/device_double_functions.h" "$(@D)/cuda/include/device_double_functions.h" && cp "/usr/local/cuda-10.0/include/device_double_functions.hpp" "$(@D)/cuda/include/device_double_functions.hpp" && cp "/usr/local/cuda-10.0/include/device_functions.h" "$(@D)/cuda/include/device_functions.h" && cp "/usr/local/cuda-10.0/include/device_functions.hpp" "$(@D)/cuda/include/device_functions.hpp" && cp "/usr/local/cuda-10.0/include/device_functions_decls.h" "$(@D)/cuda/include/device_functions_decls.h" && cp "/usr/local/cuda-10.0/include/device_launch_parameters.h" "$(@D)/cuda/include/device_launch_parameters.h" && cp "/usr/local/cuda-10.0/include/device_types.h" "$(@D)/cuda/include/device_types.h" && cp "/usr/local/cuda-10.0/include/driver_functions.h" "$(@D)/cuda/include/driver_functions.h" && cp "/usr/local/cuda-10.0/include/driver_types.h" "$(@D)/cuda/include/driver_types.h" && cp "/usr/local/cuda-10.0/include/dynlink_cuda.h" "$(@D)/cuda/include/dynlink_cuda.h" && cp "/usr/local/cuda-10.0/include/dynlink_cuda_cuda.h" "$(@D)/cuda/include/dynlink_cuda_cuda.h" && cp "/usr/local/cuda-10.0/include/dynlink_cuviddec.h" "$(@D)/cuda/include/dynlink_cuviddec.h" && cp "/usr/local/cuda-10.0/include/dynlink_nvcuvid.h" "$(@D)/cuda/include/dynlink_nvcuvid.h" && cp "/usr/local/cuda-10.0/include/fatBinaryCtl.h" "$(@D)/cuda/include/fatBinaryCtl.h" && cp "/usr/local/cuda-10.0/include/fatbinary.h" "$(@D)/cuda/include/fatbinary.h" && cp "/usr/local/cuda-10.0/include/host_config.h" "$(@D)/cuda/include/host_config.h" && cp "/usr/local/cuda-10.0/include/host_defines.h" "$(@D)/cuda/include/host_defines.h" && cp "/usr/local/cuda-10.0/include/library_types.h" "$(@D)/cuda/include/library_types.h" && cp "/usr/local/cuda-10.0/include/math_constants.h" "$(@D)/cuda/include/math_constants.h" && cp "/usr/local/cuda-10.0/include/math_functions.h" "$(@D)/cuda/include/math_functions.h" && cp "/usr/local/cuda-10.0/include/math_functions.hpp" "$(@D)/cuda/include/math_functions.hpp" && cp "/usr/local/cuda-10.0/include/math_functions_dbl_ptx3.h" "$(@D)/cuda/include/math_functions_dbl_ptx3.h" && cp "/usr/local/cuda-10.0/include/math_functions_dbl_ptx3.hpp" "$(@D)/cuda/include/math_functions_dbl_ptx3.hpp" && cp "/usr/local/cuda-10.0/include/mma.h" "$(@D)/cuda/include/mma.h" && cp "/usr/local/cuda-10.0/include/npp.h" "$(@D)/cuda/include/npp.h" && cp "/usr/local/cuda-10.0/include/nppcore.h" "$(@D)/cuda/include/nppcore.h" && cp "/usr/local/cuda-10.0/include/nppdefs.h" "$(@D)/cuda/include/nppdefs.h" && cp "/usr/local/cuda-10.0/include/nppi.h" "$(@D)/cuda/include/nppi.h" && cp "/usr/local/cuda-10.0/include/nppi_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/nppi_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-10.0/include/nppi_color_conversion.h" "$(@D)/cuda/include/nppi_color_conversion.h" && cp "/usr/local/cuda-10.0/include/nppi_compression_functions.h" "$(@D)/cuda/include/nppi_compression_functions.h" && cp "/usr/local/cuda-10.0/include/nppi_computer_vision.h" "$(@D)/cuda/include/nppi_computer_vision.h" && cp "/usr/local/cuda-10.0/include/nppi_data_exchange_and_initialization.h" "$(@D)/cuda/include/nppi_data_exchange_and_initialization.h" && cp "/usr/local/cuda-10.0/include/nppi_filtering_functions.h" "$(@D)/cuda/include/nppi_filtering_functions.h" && cp "/usr/local/cuda-10.0/include/nppi_geometry_transforms.h" "$(@D)/cuda/include/nppi_geometry_transforms.h" && cp "/usr/local/cuda-10.0/include/nppi_linear_transforms.h" "$(@D)/cuda/include/nppi_linear_transforms.h" && cp "/usr/local/cuda-10.0/include/nppi_morphological_operations.h" "$(@D)/cuda/include/nppi_morphological_operations.h" && cp "/usr/local/cuda-10.0/include/nppi_statistics_functions.h" "$(@D)/cuda/include/nppi_statistics_functions.h" && cp "/usr/local/cuda-10.0/include/nppi_support_functions.h" "$(@D)/cuda/include/nppi_support_functions.h" && cp "/usr/local/cuda-10.0/include/nppi_threshold_and_compare_operations.h" "$(@D)/cuda/include/nppi_threshold_and_compare_operations.h" && cp "/usr/local/cuda-10.0/include/npps.h" "$(@D)/cuda/include/npps.h" && cp "/usr/local/cuda-10.0/include/npps_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/npps_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-10.0/include/npps_conversion_functions.h" "$(@D)/cuda/include/npps_conversion_functions.h" && cp "/usr/local/cuda-10.0/include/npps_filtering_functions.h" "$(@D)/cuda/include/npps_filtering_functions.h" && cp "/usr/local/cuda-10.0/include/npps_initialization.h" "$(@D)/cuda/include/npps_initialization.h" && cp "/usr/local/cuda-10.0/include/npps_statistics_functions.h" "$(@D)/cuda/include/npps_statistics_functions.h" && cp "/usr/local/cuda-10.0/include/npps_support_functions.h" "$(@D)/cuda/include/npps_support_functions.h" && cp "/usr/local/cuda-10.0/include/nppversion.h" "$(@D)/cuda/include/nppversion.h" && cp "/usr/local/cuda-10.0/include/nvToolsExt.h" "$(@D)/cuda/include/nvToolsExt.h" && cp "/usr/local/cuda-10.0/include/nvToolsExtCuda.h" "$(@D)/cuda/include/nvToolsExtCuda.h" && cp "/usr/local/cuda-10.0/include/nvToolsExtCudaRt.h" "$(@D)/cuda/include/nvToolsExtCudaRt.h" && cp "/usr/local/cuda-10.0/include/nvToolsExtMeta.h" "$(@D)/cuda/include/nvToolsExtMeta.h" && cp "/usr/local/cuda-10.0/include/nvToolsExtSync.h" "$(@D)/cuda/include/nvToolsExtSync.h" && cp "/usr/local/cuda-10.0/include/nvblas.h" "$(@D)/cuda/include/nvblas.h" && cp "/usr/local/cuda-10.0/include/nvfunctional" "$(@D)/cuda/include/nvfunctional" && cp "/usr/local/cuda-10.0/include/nvgraph.h" "$(@D)/cuda/include/nvgraph.h" && cp "/usr/local/cuda-10.0/include/nvml.h" "$(@D)/cuda/include/nvml.h" && cp "/usr/local/cuda-10.0/include/nvrtc.h" "$(@D)/cuda/include/nvrtc.h" && cp "/usr/local/cuda-10.0/include/sm_20_atomic_functions.h" "$(@D)/cuda/include/sm_20_atomic_functions.h" && cp "/usr/local/cuda-10.0/include/sm_20_atomic_functions.hpp" "$(@D)/cuda/include/sm_20_atomic_functions.hpp" && cp "/usr/local/cuda-10.0/include/sm_20_intrinsics.h" "$(@D)/cuda/include/sm_20_intrinsics.h" && cp "/usr/local/cuda-10.0/include/sm_20_intrinsics.hpp" "$(@D)/cuda/include/sm_20_intrinsics.hpp" && cp "/usr/local/cuda-10.0/include/sm_30_intrinsics.h" "$(@D)/cuda/include/sm_30_intrinsics.h" && cp "/usr/local/cuda-10.0/include/sm_30_intrinsics.hpp" "$(@D)/cuda/include/sm_30_intrinsics.hpp" && cp "/usr/local/cuda-10.0/include/sm_32_atomic_functions.h" "$(@D)/cuda/include/sm_32_atomic_functions.h" && cp "/usr/local/cuda-10.0/include/sm_32_atomic_functions.hpp" "$(@D)/cuda/include/sm_32_atomic_functions.hpp" && cp "/usr/local/cuda-10.0/include/sm_32_intrinsics.h" "$(@D)/cuda/include/sm_32_intrinsics.h" && cp "/usr/local/cuda-10.0/include/sm_32_intrinsics.hpp" "$(@D)/cuda/include/sm_32_intrinsics.hpp" && cp "/usr/local/cuda-10.0/include/sm_35_atomic_functions.h" "$(@D)/cuda/include/sm_35_atomic_functions.h" && cp "/usr/local/cuda-10.0/include/sm_35_intrinsics.h" "$(@D)/cuda/include/sm_35_intrinsics.h" && cp "/usr/local/cuda-10.0/include/sm_60_atomic_functions.h" "$(@D)/cuda/include/sm_60_atomic_functions.h" && cp "/usr/local/cuda-10.0/include/sm_60_atomic_functions.hpp" "$(@D)/cuda/include/sm_60_atomic_functions.hpp" && cp "/usr/local/cuda-10.0/include/sm_61_intrinsics.h" "$(@D)/cuda/include/sm_61_intrinsics.h" && cp "/usr/local/cuda-10.0/include/sm_61_intrinsics.hpp" "$(@D)/cuda/include/sm_61_intrinsics.hpp" && cp "/usr/local/cuda-10.0/include/sobol_direction_vectors.h" "$(@D)/cuda/include/sobol_direction_vectors.h" && cp "/usr/local/cuda-10.0/include/surface_functions.h" "$(@D)/cuda/include/surface_functions.h" && cp "/usr/local/cuda-10.0/include/surface_functions.hpp" "$(@D)/cuda/include/surface_functions.hpp" && cp "/usr/local/cuda-10.0/include/surface_indirect_functions.h" "$(@D)/cuda/include/surface_indirect_functions.h" && cp "/usr/local/cuda-10.0/include/surface_indirect_functions.hpp" "$(@D)/cuda/include/surface_indirect_functions.hpp" && cp "/usr/local/cuda-10.0/include/surface_types.h" "$(@D)/cuda/include/surface_types.h" && cp "/usr/local/cuda-10.0/include/texture_fetch_functions.h" "$(@D)/cuda/include/texture_fetch_functions.h" && cp "/usr/local/cuda-10.0/include/texture_fetch_functions.hpp" "$(@D)/cuda/include/texture_fetch_functions.hpp" && cp "/usr/local/cuda-10.0/include/texture_indirect_functions.h" "$(@D)/cuda/include/texture_indirect_functions.h" && cp "/usr/local/cuda-10.0/include/texture_indirect_functions.hpp" "$(@D)/cuda/include/texture_indirect_functions.hpp" && cp "/usr/local/cuda-10.0/include/texture_types.h" "$(@D)/cuda/include/texture_types.h" && cp "/usr/local/cuda-10.0/include/thrust/adjacent_difference.h" "$(@D)/cuda/include/thrust/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/advance.h" "$(@D)/cuda/include/thrust/advance.h" && cp "/usr/local/cuda-10.0/include/thrust/binary_search.h" "$(@D)/cuda/include/thrust/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/complex.h" "$(@D)/cuda/include/thrust/complex.h" && cp "/usr/local/cuda-10.0/include/thrust/copy.h" "$(@D)/cuda/include/thrust/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/count.h" "$(@D)/cuda/include/thrust/count.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/detail/adjacent_difference.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/advance.inl" "$(@D)/cuda/include/thrust/detail/advance.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/allocator_traits.h" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/allocator_traits.inl" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/copy_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/copy_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/default_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/default_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/destroy_range.h" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/destroy_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/fill_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/fill_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/malloc_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/malloc_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/no_throw_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/no_throw_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/tagged_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/tagged_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/temporary_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/allocator/temporary_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/binary_search.inl" "$(@D)/cuda/include/thrust/detail/binary_search.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/arithmetic.h" "$(@D)/cuda/include/thrust/detail/complex/arithmetic.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/c99math.h" "$(@D)/cuda/include/thrust/detail/complex/c99math.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/catrig.h" "$(@D)/cuda/include/thrust/detail/complex/catrig.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/catrigf.h" "$(@D)/cuda/include/thrust/detail/complex/catrigf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/ccosh.h" "$(@D)/cuda/include/thrust/detail/complex/ccosh.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/ccoshf.h" "$(@D)/cuda/include/thrust/detail/complex/ccoshf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/cexp.h" "$(@D)/cuda/include/thrust/detail/complex/cexp.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/cexpf.h" "$(@D)/cuda/include/thrust/detail/complex/cexpf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/clog.h" "$(@D)/cuda/include/thrust/detail/complex/clog.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/clogf.h" "$(@D)/cuda/include/thrust/detail/complex/clogf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/complex.inl" "$(@D)/cuda/include/thrust/detail/complex/complex.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/cpow.h" "$(@D)/cuda/include/thrust/detail/complex/cpow.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/cpowf.h" "$(@D)/cuda/include/thrust/detail/complex/cpowf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/cproj.h" "$(@D)/cuda/include/thrust/detail/complex/cproj.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/csinh.h" "$(@D)/cuda/include/thrust/detail/complex/csinh.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/csinhf.h" "$(@D)/cuda/include/thrust/detail/complex/csinhf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/csqrt.h" "$(@D)/cuda/include/thrust/detail/complex/csqrt.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/csqrtf.h" "$(@D)/cuda/include/thrust/detail/complex/csqrtf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/ctanh.h" "$(@D)/cuda/include/thrust/detail/complex/ctanh.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/ctanhf.h" "$(@D)/cuda/include/thrust/detail/complex/ctanhf.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/math_private.h" "$(@D)/cuda/include/thrust/detail/complex/math_private.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/complex/stream.h" "$(@D)/cuda/include/thrust/detail/complex/stream.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config.h" "$(@D)/cuda/include/thrust/detail/config.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/compiler.h" "$(@D)/cuda/include/thrust/detail/config/compiler.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/compiler_fence.h" "$(@D)/cuda/include/thrust/detail/config/compiler_fence.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/config.h" "$(@D)/cuda/include/thrust/detail/config/config.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/debug.h" "$(@D)/cuda/include/thrust/detail/config/debug.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/device_system.h" "$(@D)/cuda/include/thrust/detail/config/device_system.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/exec_check_disable.h" "$(@D)/cuda/include/thrust/detail/config/exec_check_disable.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/forceinline.h" "$(@D)/cuda/include/thrust/detail/config/forceinline.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/global_workarounds.h" "$(@D)/cuda/include/thrust/detail/config/global_workarounds.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/host_device.h" "$(@D)/cuda/include/thrust/detail/config/host_device.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/host_system.h" "$(@D)/cuda/include/thrust/detail/config/host_system.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/config/simple_defines.h" "$(@D)/cuda/include/thrust/detail/config/simple_defines.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/contiguous_storage.h" "$(@D)/cuda/include/thrust/detail/contiguous_storage.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/contiguous_storage.inl" "$(@D)/cuda/include/thrust/detail/contiguous_storage.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/copy.h" "$(@D)/cuda/include/thrust/detail/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/copy.inl" "$(@D)/cuda/include/thrust/detail/copy.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/copy_if.h" "$(@D)/cuda/include/thrust/detail/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/copy_if.inl" "$(@D)/cuda/include/thrust/detail/copy_if.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/count.inl" "$(@D)/cuda/include/thrust/detail/count.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/cstdint.h" "$(@D)/cuda/include/thrust/detail/cstdint.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/device_delete.inl" "$(@D)/cuda/include/thrust/detail/device_delete.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/device_free.inl" "$(@D)/cuda/include/thrust/detail/device_free.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/device_malloc.inl" "$(@D)/cuda/include/thrust/detail/device_malloc.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/device_new.inl" "$(@D)/cuda/include/thrust/detail/device_new.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/device_ptr.inl" "$(@D)/cuda/include/thrust/detail/device_ptr.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/device_reference.inl" "$(@D)/cuda/include/thrust/detail/device_reference.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/device_vector.inl" "$(@D)/cuda/include/thrust/detail/device_vector.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/dispatch/is_trivial_copy.h" "$(@D)/cuda/include/thrust/detail/dispatch/is_trivial_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/distance.inl" "$(@D)/cuda/include/thrust/detail/distance.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/equal.inl" "$(@D)/cuda/include/thrust/detail/equal.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/execute_with_allocator.h" "$(@D)/cuda/include/thrust/detail/execute_with_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/execution_policy.h" "$(@D)/cuda/include/thrust/detail/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/extrema.inl" "$(@D)/cuda/include/thrust/detail/extrema.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/fill.inl" "$(@D)/cuda/include/thrust/detail/fill.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/find.inl" "$(@D)/cuda/include/thrust/detail/find.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/for_each.inl" "$(@D)/cuda/include/thrust/detail/for_each.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/function.h" "$(@D)/cuda/include/thrust/detail/function.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional.inl" "$(@D)/cuda/include/thrust/detail/functional.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/actor.h" "$(@D)/cuda/include/thrust/detail/functional/actor.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/actor.inl" "$(@D)/cuda/include/thrust/detail/functional/actor.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/argument.h" "$(@D)/cuda/include/thrust/detail/functional/argument.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/composite.h" "$(@D)/cuda/include/thrust/detail/functional/composite.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators/arithmetic_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/arithmetic_operators.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators/assignment_operator.h" "$(@D)/cuda/include/thrust/detail/functional/operators/assignment_operator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators/bitwise_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/bitwise_operators.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators/compound_assignment_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators/logical_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/logical_operators.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators/operator_adaptors.h" "$(@D)/cuda/include/thrust/detail/functional/operators/operator_adaptors.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/operators/relational_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/relational_operators.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/placeholder.h" "$(@D)/cuda/include/thrust/detail/functional/placeholder.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/functional/value.h" "$(@D)/cuda/include/thrust/detail/functional/value.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/gather.inl" "$(@D)/cuda/include/thrust/detail/gather.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/generate.inl" "$(@D)/cuda/include/thrust/detail/generate.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/get_iterator_value.h" "$(@D)/cuda/include/thrust/detail/get_iterator_value.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/host_vector.inl" "$(@D)/cuda/include/thrust/detail/host_vector.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/inner_product.inl" "$(@D)/cuda/include/thrust/detail/inner_product.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/integer_math.h" "$(@D)/cuda/include/thrust/detail/integer_math.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/integer_traits.h" "$(@D)/cuda/include/thrust/detail/integer_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/internal_functional.h" "$(@D)/cuda/include/thrust/detail/internal_functional.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/logical.inl" "$(@D)/cuda/include/thrust/detail/logical.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/detail/malloc_and_free.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/merge.inl" "$(@D)/cuda/include/thrust/detail/merge.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/minmax.h" "$(@D)/cuda/include/thrust/detail/minmax.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/mismatch.inl" "$(@D)/cuda/include/thrust/detail/mismatch.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/mpl/math.h" "$(@D)/cuda/include/thrust/detail/mpl/math.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/numeric_traits.h" "$(@D)/cuda/include/thrust/detail/numeric_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/overlapped_copy.h" "$(@D)/cuda/include/thrust/detail/overlapped_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/pair.inl" "$(@D)/cuda/include/thrust/detail/pair.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/partition.inl" "$(@D)/cuda/include/thrust/detail/partition.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/pointer.h" "$(@D)/cuda/include/thrust/detail/pointer.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/pointer.inl" "$(@D)/cuda/include/thrust/detail/pointer.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/range/head_flags.h" "$(@D)/cuda/include/thrust/detail/range/head_flags.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/range/tail_flags.h" "$(@D)/cuda/include/thrust/detail/range/tail_flags.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/raw_pointer_cast.h" "$(@D)/cuda/include/thrust/detail/raw_pointer_cast.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/raw_reference_cast.h" "$(@D)/cuda/include/thrust/detail/raw_reference_cast.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/reduce.inl" "$(@D)/cuda/include/thrust/detail/reduce.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/reference.h" "$(@D)/cuda/include/thrust/detail/reference.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/reference.inl" "$(@D)/cuda/include/thrust/detail/reference.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/reference_forward_declaration.h" "$(@D)/cuda/include/thrust/detail/reference_forward_declaration.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/remove.inl" "$(@D)/cuda/include/thrust/detail/remove.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/replace.inl" "$(@D)/cuda/include/thrust/detail/replace.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/reverse.inl" "$(@D)/cuda/include/thrust/detail/reverse.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/scan.inl" "$(@D)/cuda/include/thrust/detail/scan.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/scatter.inl" "$(@D)/cuda/include/thrust/detail/scatter.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/seq.h" "$(@D)/cuda/include/thrust/detail/seq.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/sequence.inl" "$(@D)/cuda/include/thrust/detail/sequence.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/set_operations.inl" "$(@D)/cuda/include/thrust/detail/set_operations.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/sort.inl" "$(@D)/cuda/include/thrust/detail/sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/static_assert.h" "$(@D)/cuda/include/thrust/detail/static_assert.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/static_map.h" "$(@D)/cuda/include/thrust/detail/static_map.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/swap.h" "$(@D)/cuda/include/thrust/detail/swap.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/swap.inl" "$(@D)/cuda/include/thrust/detail/swap.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/swap_ranges.inl" "$(@D)/cuda/include/thrust/detail/swap_ranges.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/tabulate.inl" "$(@D)/cuda/include/thrust/detail/tabulate.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/temporary_array.h" "$(@D)/cuda/include/thrust/detail/temporary_array.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/temporary_array.inl" "$(@D)/cuda/include/thrust/detail/temporary_array.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/detail/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/transform.inl" "$(@D)/cuda/include/thrust/detail/transform.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/transform_reduce.inl" "$(@D)/cuda/include/thrust/detail/transform_reduce.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/transform_scan.inl" "$(@D)/cuda/include/thrust/detail/transform_scan.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/trivial_sequence.h" "$(@D)/cuda/include/thrust/detail/trivial_sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/tuple.inl" "$(@D)/cuda/include/thrust/detail/tuple.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/tuple_meta_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_meta_transform.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/tuple_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_transform.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" "$(@D)/cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/function_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/function_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/has_member_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_member_function.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/has_nested_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_nested_type.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/has_trivial_assign.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_trivial_assign.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/is_call_possible.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_call_possible.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/is_metafunction_defined.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_metafunction_defined.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/iterator/is_output_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/minimum_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/minimum_type.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/pointer_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/pointer_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/type_traits/result_of_adaptable_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_copy.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_fill.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/unique.inl" "$(@D)/cuda/include/thrust/detail/unique.inl" && cp "/usr/local/cuda-10.0/include/thrust/detail/use_default.h" "$(@D)/cuda/include/thrust/detail/use_default.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/util/align.h" "$(@D)/cuda/include/thrust/detail/util/align.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/util/blocking.h" "$(@D)/cuda/include/thrust/detail/util/blocking.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/vector_base.h" "$(@D)/cuda/include/thrust/detail/vector_base.h" && cp "/usr/local/cuda-10.0/include/thrust/detail/vector_base.inl" "$(@D)/cuda/include/thrust/detail/vector_base.inl" && cp "/usr/local/cuda-10.0/include/thrust/device_allocator.h" "$(@D)/cuda/include/thrust/device_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/device_delete.h" "$(@D)/cuda/include/thrust/device_delete.h" && cp "/usr/local/cuda-10.0/include/thrust/device_free.h" "$(@D)/cuda/include/thrust/device_free.h" && cp "/usr/local/cuda-10.0/include/thrust/device_malloc.h" "$(@D)/cuda/include/thrust/device_malloc.h" && cp "/usr/local/cuda-10.0/include/thrust/device_malloc_allocator.h" "$(@D)/cuda/include/thrust/device_malloc_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/device_new.h" "$(@D)/cuda/include/thrust/device_new.h" && cp "/usr/local/cuda-10.0/include/thrust/device_new_allocator.h" "$(@D)/cuda/include/thrust/device_new_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/device_ptr.h" "$(@D)/cuda/include/thrust/device_ptr.h" && cp "/usr/local/cuda-10.0/include/thrust/device_reference.h" "$(@D)/cuda/include/thrust/device_reference.h" && cp "/usr/local/cuda-10.0/include/thrust/device_vector.h" "$(@D)/cuda/include/thrust/device_vector.h" && cp "/usr/local/cuda-10.0/include/thrust/distance.h" "$(@D)/cuda/include/thrust/distance.h" && cp "/usr/local/cuda-10.0/include/thrust/equal.h" "$(@D)/cuda/include/thrust/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/execution_policy.h" "$(@D)/cuda/include/thrust/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/extrema.h" "$(@D)/cuda/include/thrust/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/fill.h" "$(@D)/cuda/include/thrust/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/find.h" "$(@D)/cuda/include/thrust/find.h" && cp "/usr/local/cuda-10.0/include/thrust/for_each.h" "$(@D)/cuda/include/thrust/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/functional.h" "$(@D)/cuda/include/thrust/functional.h" && cp "/usr/local/cuda-10.0/include/thrust/gather.h" "$(@D)/cuda/include/thrust/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/generate.h" "$(@D)/cuda/include/thrust/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/host_vector.h" "$(@D)/cuda/include/thrust/host_vector.h" && cp "/usr/local/cuda-10.0/include/thrust/inner_product.h" "$(@D)/cuda/include/thrust/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/constant_iterator.h" "$(@D)/cuda/include/thrust/iterator/constant_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/counting_iterator.h" "$(@D)/cuda/include/thrust/iterator/counting_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/any_assign.h" "$(@D)/cuda/include/thrust/iterator/detail/any_assign.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/any_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/any_system_tag.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/constant_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/constant_iterator_base.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/counting_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/counting_iterator.inl" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/device_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/device_system_tag.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/discard_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/discard_iterator_base.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/distance_from_result.h" "$(@D)/cuda/include/thrust/iterator/detail/distance_from_result.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/host_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/host_system_tag.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/is_iterator_category.h" "$(@D)/cuda/include/thrust/iterator/detail/is_iterator_category.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/is_trivial_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/is_trivial_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/iterator_adaptor_base.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_adaptor_base.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/iterator_category_to_system.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_system.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/iterator_category_to_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/iterator_facade_category.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_facade_category.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/iterator_traits.inl" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traits.inl" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/iterator_traversal_tags.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traversal_tags.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/join_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/join_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/minimum_category.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_category.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/minimum_system.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_system.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/normal_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/normal_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/permutation_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/permutation_iterator_base.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/retag.h" "$(@D)/cuda/include/thrust/iterator/detail/retag.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/reverse_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator.inl" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/reverse_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator_base.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/tagged_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/tagged_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/transform_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_iterator.inl" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/transform_output_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_output_iterator.inl" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/tuple_of_iterator_references.h" "$(@D)/cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/universal_categories.h" "$(@D)/cuda/include/thrust/iterator/detail/universal_categories.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/zip_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator.inl" && cp "/usr/local/cuda-10.0/include/thrust/iterator/detail/zip_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator_base.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/discard_iterator.h" "$(@D)/cuda/include/thrust/iterator/discard_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/iterator_adaptor.h" "$(@D)/cuda/include/thrust/iterator/iterator_adaptor.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/iterator_categories.h" "$(@D)/cuda/include/thrust/iterator/iterator_categories.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/iterator_facade.h" "$(@D)/cuda/include/thrust/iterator/iterator_facade.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/iterator_traits.h" "$(@D)/cuda/include/thrust/iterator/iterator_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/permutation_iterator.h" "$(@D)/cuda/include/thrust/iterator/permutation_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/retag.h" "$(@D)/cuda/include/thrust/iterator/retag.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/reverse_iterator.h" "$(@D)/cuda/include/thrust/iterator/reverse_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/transform_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/transform_output_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_output_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/iterator/zip_iterator.h" "$(@D)/cuda/include/thrust/iterator/zip_iterator.h" && cp "/usr/local/cuda-10.0/include/thrust/logical.h" "$(@D)/cuda/include/thrust/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/memory.h" "$(@D)/cuda/include/thrust/memory.h" && cp "/usr/local/cuda-10.0/include/thrust/merge.h" "$(@D)/cuda/include/thrust/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/mismatch.h" "$(@D)/cuda/include/thrust/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/pair.h" "$(@D)/cuda/include/thrust/pair.h" && cp "/usr/local/cuda-10.0/include/thrust/partition.h" "$(@D)/cuda/include/thrust/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/random.h" "$(@D)/cuda/include/thrust/random.h" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/discard_block_engine.inl" "$(@D)/cuda/include/thrust/random/detail/discard_block_engine.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/linear_congruential_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/linear_congruential_engine_discard.h" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine_discard.h" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/linear_feedback_shift_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/mod.h" "$(@D)/cuda/include/thrust/random/detail/mod.h" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/normal_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/normal_distribution.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/normal_distribution_base.h" "$(@D)/cuda/include/thrust/random/detail/normal_distribution_base.h" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/random_core_access.h" "$(@D)/cuda/include/thrust/random/detail/random_core_access.h" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/subtract_with_carry_engine.inl" "$(@D)/cuda/include/thrust/random/detail/subtract_with_carry_engine.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/uniform_int_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_int_distribution.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/uniform_real_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_real_distribution.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/xor_combine_engine.inl" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine.inl" && cp "/usr/local/cuda-10.0/include/thrust/random/detail/xor_combine_engine_max.h" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine_max.h" && cp "/usr/local/cuda-10.0/include/thrust/random/discard_block_engine.h" "$(@D)/cuda/include/thrust/random/discard_block_engine.h" && cp "/usr/local/cuda-10.0/include/thrust/random/linear_congruential_engine.h" "$(@D)/cuda/include/thrust/random/linear_congruential_engine.h" && cp "/usr/local/cuda-10.0/include/thrust/random/linear_feedback_shift_engine.h" "$(@D)/cuda/include/thrust/random/linear_feedback_shift_engine.h" && cp "/usr/local/cuda-10.0/include/thrust/random/normal_distribution.h" "$(@D)/cuda/include/thrust/random/normal_distribution.h" && cp "/usr/local/cuda-10.0/include/thrust/random/subtract_with_carry_engine.h" "$(@D)/cuda/include/thrust/random/subtract_with_carry_engine.h" && cp "/usr/local/cuda-10.0/include/thrust/random/uniform_int_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_int_distribution.h" && cp "/usr/local/cuda-10.0/include/thrust/random/uniform_real_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_real_distribution.h" && cp "/usr/local/cuda-10.0/include/thrust/random/xor_combine_engine.h" "$(@D)/cuda/include/thrust/random/xor_combine_engine.h" && cp "/usr/local/cuda-10.0/include/thrust/reduce.h" "$(@D)/cuda/include/thrust/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/remove.h" "$(@D)/cuda/include/thrust/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/replace.h" "$(@D)/cuda/include/thrust/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/reverse.h" "$(@D)/cuda/include/thrust/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/scan.h" "$(@D)/cuda/include/thrust/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/scatter.h" "$(@D)/cuda/include/thrust/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/sequence.h" "$(@D)/cuda/include/thrust/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/set_operations.h" "$(@D)/cuda/include/thrust/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/sort.h" "$(@D)/cuda/include/thrust/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/swap.h" "$(@D)/cuda/include/thrust/swap.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cpp/detail/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/assign_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cpp/detail/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/count.h" "$(@D)/cuda/include/thrust/system/cpp/detail/count.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/equal.h" "$(@D)/cuda/include/thrust/system/cpp/detail/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cpp/detail/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/find.h" "$(@D)/cuda/include/thrust/system/cpp/detail/find.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cpp/detail/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/gather.h" "$(@D)/cuda/include/thrust/system/cpp/detail/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/generate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/get_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cpp/detail/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cpp/detail/iter_swap.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/logical.h" "$(@D)/cuda/include/thrust/system/cpp/detail/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cpp/detail/malloc_and_free.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/memory.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/merge.h" "$(@D)/cuda/include/thrust/system/cpp/detail/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cpp/detail/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/par.h" "$(@D)/cuda/include/thrust/system/cpp/detail/par.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/partition.h" "$(@D)/cuda/include/thrust/system/cpp/detail/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/remove.h" "$(@D)/cuda/include/thrust/system/cpp/detail/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/replace.h" "$(@D)/cuda/include/thrust/system/cpp/detail/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cpp/detail/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/sort.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cpp/detail/swap_ranges.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cpp/detail/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/transform.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/unique.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/vector.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/memory.h" "$(@D)/cuda/include/thrust/system/cpp/memory.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cpp/vector.h" "$(@D)/cuda/include/thrust/system/cpp/vector.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/config.h" "$(@D)/cuda/include/thrust/system/cuda/config.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/assign_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cuda/detail/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/core/agent_launcher.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/agent_launcher.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/core/alignment.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/alignment.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/core/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/util.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/count.h" "$(@D)/cuda/include/thrust/system/cuda/detail/count.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cross_system.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/block_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/cub.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/cub.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/host/mutex.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/host/mutex.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_allocator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_arch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_debug.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_device.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_device.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_macro.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_namespace.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_ptx.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/util_type.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_type.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/equal.h" "$(@D)/cuda/include/thrust/system/cuda/detail/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/error.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/error.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/find.h" "$(@D)/cuda/include/thrust/system/cuda/detail/find.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/gather.h" "$(@D)/cuda/include/thrust/system/cuda/detail/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/generate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/get_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/guarded_driver_types.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_driver_types.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cuda/detail/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/internal/copy_cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_cross_system.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cuda/detail/iter_swap.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/logical.h" "$(@D)/cuda/include/thrust/system/cuda/detail/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cuda/detail/malloc_and_free.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/memory.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/memory_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/memory_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/par.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/par_to_seq.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par_to_seq.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/parallel_for.h" "$(@D)/cuda/include/thrust/system/cuda/detail/parallel_for.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/partition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/remove.h" "$(@D)/cuda/include/thrust/system/cuda/detail/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/replace.h" "$(@D)/cuda/include/thrust/system/cuda/detail/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cuda/detail/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cuda/detail/swap_ranges.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/terminate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/terminate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/transform.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/unique.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/util.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/vector.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/error.h" "$(@D)/cuda/include/thrust/system/cuda/error.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/experimental/pinned_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/experimental/pinned_allocator.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/memory.h" "$(@D)/cuda/include/thrust/system/cuda/memory.h" && cp "/usr/local/cuda-10.0/include/thrust/system/cuda/vector.h" "$(@D)/cuda/include/thrust/system/cuda/vector.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/adl/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/assign_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/adl/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/count.h" "$(@D)/cuda/include/thrust/system/detail/adl/count.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/equal.h" "$(@D)/cuda/include/thrust/system/detail/adl/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/extrema.h" "$(@D)/cuda/include/thrust/system/detail/adl/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/find.h" "$(@D)/cuda/include/thrust/system/detail/adl/find.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/for_each.h" "$(@D)/cuda/include/thrust/system/detail/adl/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/gather.h" "$(@D)/cuda/include/thrust/system/detail/adl/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/generate.h" "$(@D)/cuda/include/thrust/system/detail/adl/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/get_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/get_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/adl/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/adl/iter_swap.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/logical.h" "$(@D)/cuda/include/thrust/system/detail/adl/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/adl/malloc_and_free.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/merge.h" "$(@D)/cuda/include/thrust/system/detail/adl/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/adl/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/partition.h" "$(@D)/cuda/include/thrust/system/detail/adl/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/remove.h" "$(@D)/cuda/include/thrust/system/detail/adl/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/replace.h" "$(@D)/cuda/include/thrust/system/detail/adl/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/reverse.h" "$(@D)/cuda/include/thrust/system/detail/adl/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/scatter.h" "$(@D)/cuda/include/thrust/system/detail/adl/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/sequence.h" "$(@D)/cuda/include/thrust/system/detail/adl/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/adl/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/sort.h" "$(@D)/cuda/include/thrust/system/detail/adl/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/adl/swap_ranges.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/adl/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/adl/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/transform.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/unique.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/adl/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/bad_alloc.h" "$(@D)/cuda/include/thrust/system/detail/bad_alloc.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/errno.h" "$(@D)/cuda/include/thrust/system/detail/errno.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/error_category.inl" "$(@D)/cuda/include/thrust/system/detail/error_category.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/error_code.inl" "$(@D)/cuda/include/thrust/system/detail/error_code.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/error_condition.inl" "$(@D)/cuda/include/thrust/system/detail/error_condition.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/advance.h" "$(@D)/cuda/include/thrust/system/detail/generic/advance.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/advance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/advance.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/copy_if.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/count.h" "$(@D)/cuda/include/thrust/system/detail/generic/count.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/count.inl" "$(@D)/cuda/include/thrust/system/detail/generic/count.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/distance.h" "$(@D)/cuda/include/thrust/system/detail/generic/distance.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/distance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/distance.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/equal.h" "$(@D)/cuda/include/thrust/system/detail/generic/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/equal.inl" "$(@D)/cuda/include/thrust/system/detail/generic/equal.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/extrema.h" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/extrema.inl" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/find.h" "$(@D)/cuda/include/thrust/system/detail/generic/find.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/find.inl" "$(@D)/cuda/include/thrust/system/detail/generic/find.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/for_each.h" "$(@D)/cuda/include/thrust/system/detail/generic/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/gather.h" "$(@D)/cuda/include/thrust/system/detail/generic/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/gather.inl" "$(@D)/cuda/include/thrust/system/detail/generic/gather.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/generate.h" "$(@D)/cuda/include/thrust/system/detail/generic/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/generate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/generate.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/inner_product.inl" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/logical.h" "$(@D)/cuda/include/thrust/system/detail/generic/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/memory.h" "$(@D)/cuda/include/thrust/system/detail/generic/memory.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/memory.inl" "$(@D)/cuda/include/thrust/system/detail/generic/memory.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/merge.h" "$(@D)/cuda/include/thrust/system/detail/generic/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/merge.inl" "$(@D)/cuda/include/thrust/system/detail/generic/merge.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/mismatch.inl" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/partition.h" "$(@D)/cuda/include/thrust/system/detail/generic/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/partition.inl" "$(@D)/cuda/include/thrust/system/detail/generic/partition.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/remove.h" "$(@D)/cuda/include/thrust/system/detail/generic/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/remove.inl" "$(@D)/cuda/include/thrust/system/detail/generic/remove.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/replace.h" "$(@D)/cuda/include/thrust/system/detail/generic/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/replace.inl" "$(@D)/cuda/include/thrust/system/detail/generic/replace.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/reverse.h" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/reverse.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scalar/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scalar/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scan_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scatter.h" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/scatter.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/select_system.h" "$(@D)/cuda/include/thrust/system/detail/generic/select_system.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/sequence.h" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/sequence.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/set_operations.inl" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/sort.h" "$(@D)/cuda/include/thrust/system/detail/generic/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/sort.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/swap_ranges.inl" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/tabulate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/tag.h" "$(@D)/cuda/include/thrust/system/detail/generic/tag.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/temporary_buffer.inl" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/transform.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/transform.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/transform_reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/transform_scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/type_traits.h" "$(@D)/cuda/include/thrust/system/detail/generic/type_traits.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/unique.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/unique.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/generic/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/internal/decompose.h" "$(@D)/cuda/include/thrust/system/detail/internal/decompose.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/sequential/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/assign_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/sequential/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/copy.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/copy_backward.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_backward.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/count.h" "$(@D)/cuda/include/thrust/system/detail/sequential/count.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/equal.h" "$(@D)/cuda/include/thrust/system/detail/sequential/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/execution_policy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/extrema.h" "$(@D)/cuda/include/thrust/system/detail/sequential/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/find.h" "$(@D)/cuda/include/thrust/system/detail/sequential/find.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/for_each.h" "$(@D)/cuda/include/thrust/system/detail/sequential/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/gather.h" "$(@D)/cuda/include/thrust/system/detail/sequential/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/general_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/general_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/generate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/get_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/get_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/sequential/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/insertion_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/insertion_sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/sequential/iter_swap.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/logical.h" "$(@D)/cuda/include/thrust/system/detail/sequential/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/sequential/malloc_and_free.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/merge.h" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/merge.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/sequential/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/partition.h" "$(@D)/cuda/include/thrust/system/detail/sequential/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/remove.h" "$(@D)/cuda/include/thrust/system/detail/sequential/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/replace.h" "$(@D)/cuda/include/thrust/system/detail/sequential/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/reverse.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/scatter.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/sequence.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/sequential/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/sequential/swap_ranges.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/sequential/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/transform.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/trivial_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/trivial_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/unique.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/sequential/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/detail/system_error.inl" "$(@D)/cuda/include/thrust/system/detail/system_error.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/error_code.h" "$(@D)/cuda/include/thrust/system/error_code.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/omp/detail/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/assign_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/omp/detail/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/copy.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/count.h" "$(@D)/cuda/include/thrust/system/omp/detail/count.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/equal.h" "$(@D)/cuda/include/thrust/system/omp/detail/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/detail/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/omp/detail/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/find.h" "$(@D)/cuda/include/thrust/system/omp/detail/find.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/gather.h" "$(@D)/cuda/include/thrust/system/omp/detail/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/generate.h" "$(@D)/cuda/include/thrust/system/omp/detail/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/get_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/omp/detail/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/omp/detail/iter_swap.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/logical.h" "$(@D)/cuda/include/thrust/system/omp/detail/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/omp/detail/malloc_and_free.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/omp/detail/memory.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/merge.h" "$(@D)/cuda/include/thrust/system/omp/detail/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/omp/detail/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/par.h" "$(@D)/cuda/include/thrust/system/omp/detail/par.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/partition.h" "$(@D)/cuda/include/thrust/system/omp/detail/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/partition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/partition.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/remove.h" "$(@D)/cuda/include/thrust/system/omp/detail/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/remove.inl" "$(@D)/cuda/include/thrust/system/omp/detail/remove.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/replace.h" "$(@D)/cuda/include/thrust/system/omp/detail/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/omp/detail/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/omp/detail/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/omp/detail/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/omp/detail/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/sort.h" "$(@D)/cuda/include/thrust/system/omp/detail/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/sort.inl" "$(@D)/cuda/include/thrust/system/omp/detail/sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/omp/detail/swap_ranges.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/omp/detail/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/omp/detail/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/transform.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/unique.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/unique.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/omp/detail/vector.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/memory.h" "$(@D)/cuda/include/thrust/system/omp/memory.h" && cp "/usr/local/cuda-10.0/include/thrust/system/omp/vector.h" "$(@D)/cuda/include/thrust/system/omp/vector.h" && cp "/usr/local/cuda-10.0/include/thrust/system/system_error.h" "$(@D)/cuda/include/thrust/system/system_error.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/tbb/detail/adjacent_difference.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/assign_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/tbb/detail/binary_search.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/copy.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/count.h" "$(@D)/cuda/include/thrust/system/tbb/detail/count.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/equal.h" "$(@D)/cuda/include/thrust/system/tbb/detail/equal.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/extrema.h" "$(@D)/cuda/include/thrust/system/tbb/detail/extrema.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/find.h" "$(@D)/cuda/include/thrust/system/tbb/detail/find.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/for_each.h" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/gather.h" "$(@D)/cuda/include/thrust/system/tbb/detail/gather.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/generate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/generate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/get_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/get_value.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/tbb/detail/inner_product.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/tbb/detail/iter_swap.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/logical.h" "$(@D)/cuda/include/thrust/system/tbb/detail/logical.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/tbb/detail/malloc_and_free.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/memory.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/memory.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/merge.h" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/merge.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/tbb/detail/mismatch.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/par.h" "$(@D)/cuda/include/thrust/system/tbb/detail/par.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/partition.h" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/partition.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_intervals.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/remove.h" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/remove.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/replace.h" "$(@D)/cuda/include/thrust/system/tbb/detail/replace.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/reverse.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reverse.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/scan.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/scatter.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scatter.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/sequence.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sequence.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/tbb/detail/set_operations.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/sort.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/sort.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/tbb/detail/swap_ranges.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/tbb/detail/temporary_buffer.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/transform.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/unique.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/unique.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/detail/vector.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/vector.inl" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/execution_policy.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/memory.h" "$(@D)/cuda/include/thrust/system/tbb/memory.h" && cp "/usr/local/cuda-10.0/include/thrust/system/tbb/vector.h" "$(@D)/cuda/include/thrust/system/tbb/vector.h" && cp "/usr/local/cuda-10.0/include/thrust/system_error.h" "$(@D)/cuda/include/thrust/system_error.h" && cp "/usr/local/cuda-10.0/include/thrust/tabulate.h" "$(@D)/cuda/include/thrust/tabulate.h" && cp "/usr/local/cuda-10.0/include/thrust/transform.h" "$(@D)/cuda/include/thrust/transform.h" && cp "/usr/local/cuda-10.0/include/thrust/transform_reduce.h" "$(@D)/cuda/include/thrust/transform_reduce.h" && cp "/usr/local/cuda-10.0/include/thrust/transform_scan.h" "$(@D)/cuda/include/thrust/transform_scan.h" && cp "/usr/local/cuda-10.0/include/thrust/tuple.h" "$(@D)/cuda/include/thrust/tuple.h" && cp "/usr/local/cuda-10.0/include/thrust/uninitialized_copy.h" "$(@D)/cuda/include/thrust/uninitialized_copy.h" && cp "/usr/local/cuda-10.0/include/thrust/uninitialized_fill.h" "$(@D)/cuda/include/thrust/uninitialized_fill.h" && cp "/usr/local/cuda-10.0/include/thrust/unique.h" "$(@D)/cuda/include/thrust/unique.h" && cp "/usr/local/cuda-10.0/include/thrust/version.h" "$(@D)/cuda/include/thrust/version.h" && cp "/usr/local/cuda-10.0/include/vector_functions.h" "$(@D)/cuda/include/vector_functions.h" && cp "/usr/local/cuda-10.0/include/vector_functions.hpp" "$(@D)/cuda/include/vector_functions.hpp" && cp "/usr/local/cuda-10.0/include/vector_types.h" "$(@D)/cuda/include/vector_types.h" """, ) @@ -1203,7 +1203,7 @@ genrule( "cuda/nvvm/libdevice/libdevice.10.bc", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/nvvm/libdevice/libdevice.10.bc" "$(@D)//libdevice.10.bc" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-10.0/nvvm/libdevice/libdevice.10.bc" "$(@D)//libdevice.10.bc" """, ) @@ -1240,7 +1240,7 @@ genrule( "cuda/extras/CUPTI/include/openacc/cupti_openacc.h", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/gl.h" "$(@D)/cuda/extras/CUPTI/include/GL/gl.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glew.h" "$(@D)/cuda/extras/CUPTI/include/GL/glew.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glu.h" "$(@D)/cuda/extras/CUPTI/include/GL/glu.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glut.h" "$(@D)/cuda/extras/CUPTI/include/GL/glut.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glx.h" "$(@D)/cuda/extras/CUPTI/include/GL/glx.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glxext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glxext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/wglew.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglew.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/wglext.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cuda_stdint.h" "$(@D)/cuda/extras/CUPTI/include/cuda_stdint.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti.h" "$(@D)/cuda/extras/CUPTI/include/cupti.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_activity.h" "$(@D)/cuda/extras/CUPTI/include/cupti_activity.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_callbacks.h" "$(@D)/cuda/extras/CUPTI/include/cupti_callbacks.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_driver_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_driver_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_events.h" "$(@D)/cuda/extras/CUPTI/include/cupti_events.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_metrics.h" "$(@D)/cuda/extras/CUPTI/include/cupti_metrics.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_nvtx_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_nvtx_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_result.h" "$(@D)/cuda/extras/CUPTI/include/cupti_result.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_runtime_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_runtime_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_version.h" "$(@D)/cuda/extras/CUPTI/include/cupti_version.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cudaGL_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaGL_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cudaVDPAU_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_nvtx_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_nvtx_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/openacc/cupti_openacc.h" "$(@D)/cuda/extras/CUPTI/include/openacc/cupti_openacc.h" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/gl.h" "$(@D)/cuda/extras/CUPTI/include/GL/gl.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/glew.h" "$(@D)/cuda/extras/CUPTI/include/GL/glew.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/glext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glext.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/glu.h" "$(@D)/cuda/extras/CUPTI/include/GL/glu.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/glut.h" "$(@D)/cuda/extras/CUPTI/include/GL/glut.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/glx.h" "$(@D)/cuda/extras/CUPTI/include/GL/glx.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/glxext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glxext.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/wglew.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglew.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/GL/wglext.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglext.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cuda_stdint.h" "$(@D)/cuda/extras/CUPTI/include/cuda_stdint.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti.h" "$(@D)/cuda/extras/CUPTI/include/cupti.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_activity.h" "$(@D)/cuda/extras/CUPTI/include/cupti_activity.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_callbacks.h" "$(@D)/cuda/extras/CUPTI/include/cupti_callbacks.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_driver_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_driver_cbid.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_events.h" "$(@D)/cuda/extras/CUPTI/include/cupti_events.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_metrics.h" "$(@D)/cuda/extras/CUPTI/include/cupti_metrics.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_nvtx_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_nvtx_cbid.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_result.h" "$(@D)/cuda/extras/CUPTI/include/cupti_result.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_runtime_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_runtime_cbid.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/cupti_version.h" "$(@D)/cuda/extras/CUPTI/include/cupti_version.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/generated_cudaGL_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaGL_meta.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/generated_cudaVDPAU_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/generated_cuda_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_meta.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/generated_nvtx_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_nvtx_meta.h" && cp "/usr/local/cuda-10.0/extras/CUPTI/include/openacc/cupti_openacc.h" "$(@D)/cuda/extras/CUPTI/include/openacc/cupti_openacc.h" """, ) @@ -1248,17 +1248,17 @@ genrule( name = "cuda-lib", outs = [ "cuda/lib/libcuda.so", - "cuda/lib/libcudart.so.9.0", + "cuda/lib/libcudart.so.10.0", "cuda/lib/libcudart_static.a", - "cuda/lib/libcublas.so.9.0", - "cuda/lib/libcusolver.so.9.0", - "cuda/lib/libcurand.so.9.0", - "cuda/lib/libcufft.so.9.0", + "cuda/lib/libcublas.so.10.0", + "cuda/lib/libcusolver.so.10.0", + "cuda/lib/libcurand.so.10.0", + "cuda/lib/libcufft.so.10.0", "cuda/lib/libcudnn.so.7", - "cuda/lib/libcupti.so.9.0", + "cuda/lib/libcupti.so.10.0", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.480" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.2.1" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-10.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudart.so.10.0.176" "$(@D)/cuda/lib/libcudart.so.10.0" && cp "/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcublas.so.10.0.480" "$(@D)/cuda/lib/libcublas.so.10.0" && cp "/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcusolver.so.10.0.176" "$(@D)/cuda/lib/libcusolver.so.10.0" && cp "/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcurand.so.10.0.176" "$(@D)/cuda/lib/libcurand.so.10.0" && cp "/usr/local/cuda-10.0/targets/x86_64-linux/lib/libcufft.so.10.0.176" "$(@D)/cuda/lib/libcufft.so.10.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.2.1" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.10.0.176" "$(@D)/cuda/lib/libcupti.so.10.0" """, ) diff --git a/third_party/toolchains/gpus/cuda/cuda/cuda_config.h b/third_party/toolchains/gpus/cuda/cuda/cuda_config.h index 7cdaf144ad..b05bfb7326 100644 --- a/third_party/toolchains/gpus/cuda/cuda/cuda_config.h +++ b/third_party/toolchains/gpus/cuda/cuda/cuda_config.h @@ -19,9 +19,9 @@ limitations under the License. #define TF_CUDA_CAPABILITIES CudaVersion("3.0") -#define TF_CUDA_VERSION "9.0" +#define TF_CUDA_VERSION "10.0" #define TF_CUDNN_VERSION "7" -#define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-9.0" +#define TF_CUDA_TOOLKIT_PATH "/usr/local/cuda-10.0" #endif // CUDA_CUDA_CONFIG_H_ diff --git a/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/clang/bin/crosstool_wrapper_driver_is_not_gcc b/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/clang/bin/crosstool_wrapper_driver_is_not_gcc index 63893d3722..192314137d 100755 --- a/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/clang/bin/crosstool_wrapper_driver_is_not_gcc +++ b/third_party/toolchains/preconfig/ubuntu14.04/gcc-nvcc/clang/bin/crosstool_wrapper_driver_is_not_gcc @@ -49,9 +49,9 @@ import pipes CPU_COMPILER = ('/usr/bin/gcc') GCC_HOST_COMPILER_PATH = ('/usr/bin/gcc') -NVCC_PATH = '/usr/local/cuda-9.0/bin/nvcc' +NVCC_PATH = '/usr/local/cuda/bin/nvcc' PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) -NVCC_VERSION = '9.0' +NVCC_VERSION = '10.0' def Log(s): print('gpus/crosstool: {0}'.format(s)) -- GitLab From e0aa9387258df040e296f18c6b6fdb560bce6fcc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 29 Dec 2018 01:02:34 -0800 Subject: [PATCH 0078/1095] compat: Update forward compatibility horizon to 2018-12-29 PiperOrigin-RevId: 227206663 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index df10dc1b79..3a9b3b3838 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 28) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 29) @tf_export("compat.forward_compatible") -- GitLab From d2dd369f9dadb5dd3220ababa299ad89cd8e8574 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 29 Dec 2018 08:46:45 -0800 Subject: [PATCH 0079/1095] Split input-related classes out of python/distribute/values.py into new file .../input_lib.py. PiperOrigin-RevId: 227227637 --- tensorflow/contrib/distribute/python/BUILD | 31 +- .../python/collective_all_reduce_strategy.py | 11 +- .../distribute/python/input_lib_test.py | 480 ++++++++++++ .../distribute/python/mirrored_strategy.py | 8 +- .../distribute/python/one_device_strategy.py | 12 +- .../python/parameter_server_strategy.py | 13 +- .../contrib/distribute/python/tpu_strategy.py | 12 +- .../contrib/distribute/python/values_test.py | 446 ----------- tensorflow/python/distribute/BUILD | 20 +- tensorflow/python/distribute/input_lib.py | 707 ++++++++++++++++++ .../python/distribute/mirrored_strategy.py | 16 +- tensorflow/python/distribute/values.py | 678 ----------------- 12 files changed, 1272 insertions(+), 1162 deletions(-) create mode 100644 tensorflow/contrib/distribute/python/input_lib_test.py create mode 100644 tensorflow/python/distribute/input_lib.py diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 2d6a08df9a..f27224e46e 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -23,17 +23,14 @@ cuda_py_test( additional_deps = [ ":combinations", ":mirrored_strategy", - ":multi_worker_test_base", "@absl_py//absl/testing:parameterized", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:constant_op", - "//tensorflow/python:errors", "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python:training", "//tensorflow/python:variable_scope", - "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/distribute:device_util", "//tensorflow/python/distribute:values", "//tensorflow/python/eager:context", @@ -45,14 +42,36 @@ cuda_py_test( ], ) +cuda_py_test( + name = "input_lib_test", + srcs = ["input_lib_test.py"], + additional_deps = [ + ":combinations", + ":mirrored_strategy", + ":multi_worker_test_base", + "@absl_py//absl/testing:parameterized", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:errors", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/distribute:input_lib", + "//tensorflow/python/distribute:values", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], + tags = [ + "no_pip", + ], +) + py_library( name = "mirrored_strategy", srcs = ["mirrored_strategy.py"], visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/python/distribute:distribute_lib", + "//tensorflow/python/distribute:input_lib", "//tensorflow/python/distribute:mirrored_strategy", - "//tensorflow/python/distribute:values", ], ) @@ -69,6 +88,7 @@ py_library( "//tensorflow/python:training", "//tensorflow/python:util", "//tensorflow/python/distribute:cross_device_ops", + "//tensorflow/python/distribute:input_lib", "//tensorflow/python/distribute:multi_worker_util", "//tensorflow/python/distribute:reduce_util", "//tensorflow/python/distribute:values", @@ -119,6 +139,7 @@ py_library( "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python/distribute:distribute_lib", + "//tensorflow/python/distribute:input_lib", "//tensorflow/python/distribute:reduce_util", "//tensorflow/python/distribute:values", "//tensorflow/python/eager:context", @@ -139,6 +160,7 @@ py_library( "//tensorflow/python:training", "//tensorflow/python/distribute:cross_device_ops", "//tensorflow/python/distribute:cross_device_utils", + "//tensorflow/python/distribute:input_lib", "//tensorflow/python/distribute:multi_worker_util", "//tensorflow/python/distribute:values", "//tensorflow/python/eager:context", @@ -289,6 +311,7 @@ py_library( "//tensorflow/python:framework_ops", "//tensorflow/python:tensor_util", "//tensorflow/python:util", + "//tensorflow/python/distribute:input_lib", "//tensorflow/python/distribute:reduce_util", "//tensorflow/python/distribute:values", ], diff --git a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py index 12197c3d0d..f6361cb6e8 100644 --- a/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py +++ b/tensorflow/contrib/distribute/python/collective_all_reduce_strategy.py @@ -26,6 +26,7 @@ from tensorflow.python.distribute import cross_device_ops as cross_device_ops_li from tensorflow.python.distribute import cross_device_utils from tensorflow.python.distribute import device_util from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import multi_worker_util from tensorflow.python.distribute import values from tensorflow.python.eager import context @@ -130,7 +131,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): self._collective_keys = cross_device_utils.CollectiveKeys() self._initialize_local(local_devices) - self._input_workers = values.InputWorkers( + self._input_workers = input_lib.InputWorkers( self._device_map, [(self._worker_device, self.worker_devices)]) self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( num_workers=self._num_workers, @@ -229,13 +230,13 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): """Distributes the dataset to each local GPU.""" # TODO(yuefengz): shard the dataset. worker_index = 0 - return values.PerReplicaDataset( + return input_lib.PerReplicaDataset( self._call_dataset_fn(dataset_fn), self._input_workers, worker_index, prefetch_on_device=True) def _make_dataset_iterator(self, dataset): - return values.DatasetIterator(dataset, self._input_workers, - self._num_replicas_in_sync) + return input_lib.DatasetIterator(dataset, self._input_workers, + self._num_replicas_in_sync) def _make_input_fn_iterator( self, @@ -252,7 +253,7 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended): input_pipeline_id=input_pipeline_id, num_replicas_in_sync=self._num_replicas_in_sync) - return values.InputFunctionIterator( + return input_lib.InputFunctionIterator( input_fn, self._input_workers, [input_context]) def _configure(self, diff --git a/tensorflow/contrib/distribute/python/input_lib_test.py b/tensorflow/contrib/distribute/python/input_lib_test.py new file mode 100644 index 0000000000..f589cd6ad5 --- /dev/null +++ b/tensorflow/contrib/distribute/python/input_lib_test.py @@ -0,0 +1,480 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the input_lib library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.contrib.distribute.python import multi_worker_test_base +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.data.experimental.ops import batching +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib +from tensorflow.python.distribute import values +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import errors +from tensorflow.python.framework import test_util +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.util import nest + + +class PerReplicaDatasetTest(test.TestCase): + + config = config_pb2.ConfigProto() + config.allow_soft_placement = True + + def _test_iterator(self, devices, dataset, expected_values): + device_map = values.ReplicaDeviceMap(devices) + input_workers = input_lib.InputWorkers(device_map) + per_replica_dataset = input_lib.PerReplicaDataset(dataset, input_workers, 0) + if context.executing_eagerly(): + iterator = per_replica_dataset.make_one_shot_iterator() + else: + iterator = per_replica_dataset.make_initializable_iterator() + self.evaluate([iterator.initializer]) + + for expected_value in expected_values: + next_element = iterator.get_next_as_list() + computed_value = self.evaluate(next_element) + self.assertEqual(expected_value, computed_value) + + with self.assertRaises(errors.OutOfRangeError): + next_element = iterator.get_next_as_list() + self.evaluate(next_element) + + @test_util.run_in_graph_and_eager_modes + def testOneDevice(self): + devices = ["/device:CPU:0"] + dataset = dataset_ops.Dataset.range(10) + + expected_values = [[i] for i in range(10)] + + self._test_iterator(devices, dataset, expected_values) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testMultipleDevices(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + devices = ["/device:CPU:0", "/device:GPU:0"] + dataset = dataset_ops.Dataset.range(10) + + expected_values = [[i, i+1] for i in range(0, 10, 2)] + + self._test_iterator(devices, dataset, expected_values) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testTupleDataset(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + devices = ["/device:CPU:0", "/device:GPU:0"] + dataset1 = dataset_ops.Dataset.range(10) + dataset2 = dataset_ops.Dataset.range(10).map(lambda x: x**2) + dataset = dataset_ops.Dataset.zip((dataset1, dataset2)) + + expected_values = [[(i, i**2), (i+1, (i+1)**2)] for i in range(0, 10, 2)] + + self._test_iterator(devices, dataset, expected_values) + + @test_util.run_in_graph_and_eager_modes(config=config) + def testUnevenDatasetBatches(self): + if context.num_gpus() < 1 and context.executing_eagerly(): + self.skipTest("A GPU is not available for this test in eager mode.") + + devices = ["/device:CPU:0", "/device:GPU:0"] + dataset = dataset_ops.Dataset.range(11) + + expected_values = [[i, i+1] for i in range(0, 10, 2)] + self._test_iterator(devices, dataset, expected_values) + + def testInitializableIterator(self): + with context.graph_mode(): + devices = ["/device:CPU:0"] + # Using random input since that is only allowed with initializable + # iterator. + dataset = dataset_ops.Dataset.from_tensor_slices( + random_ops.random_uniform((10,))) + + device_map = values.ReplicaDeviceMap(devices) + input_workers = input_lib.InputWorkers(device_map) + per_replica_dataset = input_lib.PerReplicaDataset( + dataset, input_workers, 0) + iterator = per_replica_dataset.make_initializable_iterator() + + self.evaluate(iterator.initializer) + next_element = iterator.get_next_as_list() + for _ in range(10): + self.evaluate(next_element) + + # Should fail after the input is finished. + with self.assertRaises(errors.OutOfRangeError): + self.evaluate(next_element) + + # After re-initializing the iterator, should be able to iterate again. + self.evaluate(iterator.initializer) + for _ in range(10): + self.evaluate(next_element) + + +class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase): + + def _test_iterator(self, sess, iterator, devices, expected_values): + next_element = iterator.get_next() + for r, device in enumerate(devices): + v = values.select_replica(r, next_element) + # The `v` here can be a tuple. + for element in nest.flatten(v): + self.assertTrue(element.device in device) + + for expected_value in expected_values: + t = [values.select_replica(r, next_element) for r in range(len(devices))] + actual = sess.run(t) + self.assertEqual(expected_value, actual) + + with self.assertRaises(errors.OutOfRangeError): + sess.run([values.select_replica(r, next_element) + for r in range(len(devices))]) + + def _test_dataset(self, dataset_fn, worker_devices, devices, + expected_values): + device_map = values.ReplicaDeviceMap(devices) + input_workers = input_lib.InputWorkers(device_map, worker_devices) + multi_worker_dataset = input_lib.MultiWorkerDataset( + dataset_fn, input_workers) + multi_worker_iterator = multi_worker_dataset.make_initializable_iterator() + with self.cached_session() as sess: + sess.run(multi_worker_iterator.initializer) + self._test_iterator(sess, multi_worker_iterator, devices, expected_values) + + def _cpu_devices(self): + worker_devices = ( + ("/job:worker/replica:0/task:0", + ["/job:worker/replica:0/task:0/device:CPU:0"]), + ("/job:worker/replica:0/task:1", + ["/job:worker/replica:0/task:1/device:CPU:0"]) + ) + devices = [ + "/job:worker/replica:0/task:0/device:CPU:0", + "/job:worker/replica:0/task:1/device:CPU:0" + ] + return worker_devices, devices + + def _cpu_and_one_gpu_devices(self): + worker_devices = ( + ("/job:worker/replica:0/task:0", ( + "/job:worker/replica:0/task:0/device:GPU:0", + "/job:worker/replica:0/task:0/device:CPU:0" + )), + ("/job:worker/replica:0/task:1", ( + "/job:worker/replica:0/task:1/device:GPU:0", + "/job:worker/replica:0/task:1/device:CPU:0" + )) + ) + devices = [ + "/job:worker/replica:0/task:0/device:GPU:0", + "/job:worker/replica:0/task:0/device:CPU:0", + "/job:worker/replica:0/task:1/device:GPU:0", + "/job:worker/replica:0/task:1/device:CPU:0" + ] + return worker_devices, devices + + def testDataDistributionOneDevicePerWorker(self): + worker_devices, devices = self._cpu_devices() + with context.graph_mode(): + dataset_fn = lambda: dataset_ops.Dataset.range(8) + self._test_dataset( + dataset_fn, worker_devices, devices, + [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]) + + def testDataDistributionTwoDevicePerWorker(self): + if context.num_gpus() < 1: + self.skipTest("A GPU is not available for this test.") + worker_devices, devices = self._cpu_and_one_gpu_devices() + with context.graph_mode(): + dataset_fn = lambda: dataset_ops.Dataset.range(8) + self._test_dataset( + dataset_fn, worker_devices, devices, + [[0, 1, 0, 1], [2, 3, 2, 3], [4, 5, 4, 5], [6, 7, 6, 7]]) + + def testTupleDataset(self): + worker_devices, devices = self._cpu_devices() + + with context.graph_mode(): + + def dataset_fn(): + dataset1 = dataset_ops.Dataset.range(8) + dataset2 = dataset_ops.Dataset.range(8).map(lambda x: x**2) + return dataset_ops.Dataset.zip((dataset1, dataset2)) + + expected_values = [[(i, i**2), (i, i**2)] for i in range(8)] + self._test_dataset(dataset_fn, worker_devices, devices, + expected_values) + + def testInitializableIterator(self): + worker_devices, devices = self._cpu_devices() + with context.graph_mode(), self.cached_session() as sess: + dataset_fn = lambda: dataset_ops.Dataset.range(8) + device_map = values.ReplicaDeviceMap(devices) + input_workers = input_lib.InputWorkers(device_map, worker_devices) + multi_worker_dataset = input_lib.MultiWorkerDataset( + dataset_fn, input_workers) + multi_worker_iterator = multi_worker_dataset.make_initializable_iterator() + + sess.run(multi_worker_iterator.initializer) + self._test_iterator( + sess, multi_worker_iterator, devices, + [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]) + + # After re-initializing the iterator, should be able to iterate again. + sess.run(multi_worker_iterator.initializer) + self._test_iterator( + sess, multi_worker_iterator, devices, + [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]) + + def testValueErrorForIterator(self): + # Incompatiable arguments. + d1 = "/device:GPU:0" + d2 = "/device:GPU:1" + device_map = values.ReplicaDeviceMap([d1, d2]) + input_workers = input_lib.InputWorkers( + device_map, (("w1", (d1,)), ("w2", (d2,)))) + with self.assertRaises(ValueError): + input_lib.MultiWorkerDataIterator([("w1", None)], input_workers) + + def testDuplicateDevices(self): + _, devices = self._cpu_devices() + devices.append("/job:worker/replica:0/task:0/device:CPU:0") + with self.assertRaises(ValueError): + _ = values.ReplicaDeviceMap(devices) + + +class InputIteratorTestBase(test.TestCase): + + def _test_iterator(self, input_type, dataset_fn, worker_device_pairs, + expected_values, sess=None, split_batch_by=None): + devices = nest.flatten([ds for _, ds in worker_device_pairs]) + device_map = values.ReplicaDeviceMap(devices) + input_workers = input_lib.InputWorkers(device_map, worker_device_pairs) + + if input_type == "input_fn": + input_contexts = [ + distribute_lib.InputContext() for _ in worker_device_pairs] + input_fn = lambda _: dataset_fn() + iterator = input_lib.InputFunctionIterator( + input_fn, input_workers, input_contexts) + else: + iterator = input_lib.DatasetIterator( + dataset_fn(), input_workers, split_batch_by) + + evaluate = lambda x: sess.run(x) if sess else self.evaluate(x) + + evaluate(control_flow_ops.group(iterator.initialize())) + + for expected_value in expected_values: + next_element = iterator.get_next() + computed_value = evaluate( + [values.select_replica(r, next_element) for r in range(len(devices))]) + self.assertAllEqual(expected_value, computed_value) + + with self.assertRaises(errors.OutOfRangeError): + next_element = iterator.get_next() + evaluate([values.select_replica(r, next_element) + for r in range(len(devices))]) + + # After re-initializing the iterator, should be able to iterate again. + evaluate(control_flow_ops.group(iterator.initialize())) + + for expected_value in expected_values: + next_element = iterator.get_next() + computed_value = evaluate( + [values.select_replica(r, next_element) for r in range(len(devices))]) + self.assertAllEqual(expected_value, computed_value) + + +class InputIteratorSingleWorkerTest(InputIteratorTestBase, + parameterized.TestCase): + + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + input_type=["input_fn", "dataset"])) + def testOneDeviceCPU(self, input_type): + worker_device_pairs = [("", ["/device:CPU:0"])] + dataset_fn = lambda: dataset_ops.Dataset.range(10) + + expected_values = [[i] for i in range(10)] + + self._test_iterator(input_type, dataset_fn, worker_device_pairs, + expected_values) + + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + input_type=["input_fn", "dataset"], + required_gpus=1)) + def testTwoDevicesOneGPUOneCPU(self, input_type): + worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] + dataset_fn = lambda: dataset_ops.Dataset.range(10) + + expected_values = [[i, i+1] for i in range(0, 10, 2)] + + self._test_iterator(input_type, dataset_fn, worker_device_pairs, + expected_values) + + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + input_type=["input_fn", "dataset"], + required_gpus=1)) + def testTupleDataset(self, input_type): + worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] + def dataset_fn(): + dataset1 = dataset_ops.Dataset.range(10) + dataset2 = dataset_ops.Dataset.range(10).map(lambda x: x**2) + return dataset_ops.Dataset.zip((dataset1, dataset2)) + + expected_values = [[(i, i**2), (i+1, (i+1)**2)] for i in range(0, 10, 2)] + + self._test_iterator(input_type, dataset_fn, worker_device_pairs, + expected_values) + + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + input_type=["input_fn", "dataset"], + required_gpus=1)) + def testUnevenDatasetBatches(self, input_type): + worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] + dataset_fn = lambda: dataset_ops.Dataset.range(11) + + expected_values = [[i, i+1] for i in range(0, 10, 2)] + self._test_iterator(input_type, dataset_fn, worker_device_pairs, + expected_values) + + @combinations.generate(combinations.combine( + mode=["graph", "eager"], + input_type=["dataset"], + split_batch_by=[None, 2], + required_gpus=1)) + def testBatchSplitting(self, input_type, split_batch_by): + worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] + batch_size = 10 + dataset_fn = lambda: dataset_ops.Dataset.range(100).batch(batch_size) + + updated_batch_size = ( + batch_size // split_batch_by if split_batch_by else batch_size) + expected_values = [[range(i, i+updated_batch_size), + range(i+updated_batch_size, i+2*updated_batch_size)] + for i in range(0, 100, updated_batch_size*2)] + + self._test_iterator(input_type, dataset_fn, worker_device_pairs, + expected_values, sess=None, + split_batch_by=split_batch_by) + + +class InputIteratorMultiWorkerTest( + multi_worker_test_base.MultiWorkerTestBase, InputIteratorTestBase, + parameterized.TestCase): + + def _cpu_devices(self): + return [ + ("/job:worker/replica:0/task:0", + ["/job:worker/replica:0/task:0/device:CPU:0"]), + ("/job:worker/replica:0/task:1", + ["/job:worker/replica:0/task:1/device:CPU:0"])] + + def _cpu_and_one_gpu_devices(self): + return [ + ("/job:worker/replica:0/task:0", [ + "/job:worker/replica:0/task:0/device:GPU:0", + "/job:worker/replica:0/task:0/device:CPU:0" + ]), + ("/job:worker/replica:0/task:1", [ + "/job:worker/replica:0/task:1/device:GPU:0", + "/job:worker/replica:0/task:1/device:CPU:0" + ]) + ] + + @combinations.generate(combinations.combine( + mode=["graph"], + input_type=["input_fn", "dataset"])) + def testOneDevicePerWorker(self, input_type): + worker_devices = self._cpu_devices() + with context.graph_mode(), self.cached_session() as sess: + dataset_fn = lambda: dataset_ops.Dataset.range(4) + self._test_iterator(input_type, dataset_fn, worker_devices, + [[0, 0], [1, 1], [2, 2], [3, 3]], sess) + + @combinations.generate(combinations.combine( + mode=["graph"], + input_type=["input_fn", "dataset"], + required_gpus=1)) + def testTwoDevicesPerWorker(self, input_type): + worker_devices = self._cpu_and_one_gpu_devices() + with context.graph_mode(), self.cached_session() as sess: + dataset_fn = lambda: dataset_ops.Dataset.range(4) + self._test_iterator(input_type, dataset_fn, worker_devices, + [[0, 1, 0, 1], [2, 3, 2, 3]], sess) + + @combinations.generate(combinations.combine( + mode=["graph"], + input_type=["input_fn", "dataset"])) + def testTupleDataset(self, input_type): + worker_devices = self._cpu_devices() + with context.graph_mode(), self.cached_session() as sess: + def dataset_fn(): + dataset1 = dataset_ops.Dataset.range(4) + dataset2 = dataset_ops.Dataset.range(4).map(lambda x: x**2) + return dataset_ops.Dataset.zip((dataset1, dataset2)) + + expected_values = [[(i, i**2), (i, i**2)] for i in range(0, 4)] + self._test_iterator(input_type, dataset_fn, worker_devices, + expected_values, sess) + + +class SplitDatasetBatchTest(test.TestCase): + + def testBatchDataset(self): + dataset = dataset_ops.Dataset.range(100).batch(20) + split_batch_by = 2 + result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by) + expected_values = [range(i, i+10) for i in range(0, 100, 10)] + result = [self.evaluate(el) for el in result_dataset] + self.assertAllEqual(expected_values, result) + + def testMapAndBatchDataset(self): + dataset = dataset_ops.Dataset.range(100) + dataset = dataset.apply(batching.map_and_batch(lambda x: x, 20)) + split_batch_by = 2 + result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by) + expected_values = [range(i, i+10) for i in range(0, 100, 10)] + result = [self.evaluate(el) for el in result_dataset] + self.assertAllEqual(expected_values, result) + + def testPrefetchDataset(self): + dataset = dataset_ops.Dataset.range(100).batch(20).prefetch(1) + split_batch_by = 2 + result_dataset = input_lib._split_dataset_batch(dataset, split_batch_by) + expected_values = [range(i, i+10) for i in range(0, 100, 10)] + result = [self.evaluate(el) for el in result_dataset] + self.assertAllEqual(expected_values, result) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 71e50b83b0..db8fd98307 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -21,8 +21,8 @@ from __future__ import print_function import functools from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import mirrored_strategy -from tensorflow.python.distribute import values # pylint: disable=protected-access,invalid-name @@ -135,14 +135,14 @@ class MirroredExtended(CoreMirroredExtended): Returns: An `InputIterator` which returns inputs for each step of the computation. """ - return values.DatasetIterator(dataset, self._input_workers) + return input_lib.DatasetIterator(dataset, self._input_workers) def _distribute_dataset(self, dataset_fn): if self._local_mode: - return values.PerReplicaDataset( + return input_lib.PerReplicaDataset( self._call_dataset_fn(dataset_fn), self._input_workers, 0) else: - return values.MultiWorkerDataset( + return input_lib.MultiWorkerDataset( functools.partial(self._call_dataset_fn, dataset_fn), self._input_workers, auto_shard=self._auto_shard_dataset) diff --git a/tensorflow/contrib/distribute/python/one_device_strategy.py b/tensorflow/contrib/distribute/python/one_device_strategy.py index 700751d68c..fb470f8546 100644 --- a/tensorflow/contrib/distribute/python/one_device_strategy.py +++ b/tensorflow/contrib/distribute/python/one_device_strategy.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.python.distribute import device_util from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import values from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -52,7 +53,8 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended): worker = device_util.canonicalize("/device:CPU:0") worker_device_pairs = [(worker, [self._device])] device_map = values.SingleDeviceMap(device) - self._input_workers = values.InputWorkers(device_map, worker_device_pairs) + self._input_workers = input_lib.InputWorkers( + device_map, worker_device_pairs) def _create_variable(self, next_creator, *args, **kwargs): colocate_with = kwargs.pop("colocate_with", None) @@ -67,17 +69,17 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended): def _make_dataset_iterator(self, dataset): """Make iterator from dataset without splitting the batch.""" - return values.DatasetIterator(dataset, self._input_workers) + return input_lib.DatasetIterator(dataset, self._input_workers) def _distribute_dataset(self, dataset_fn): - return values.PerReplicaDataset( + return input_lib.PerReplicaDataset( self._call_dataset_fn(dataset_fn), self._input_workers, 0) def _make_input_fn_iterator( self, input_fn, replication_mode=distribute_lib.InputReplicationMode.PER_WORKER): - return values.InputFunctionIterator( + return input_lib.InputFunctionIterator( input_fn, self._input_workers, [distribute_lib.InputContext()]) def _broadcast_to(self, tensor, destinations): @@ -91,7 +93,7 @@ class OneDeviceExtended(distribute_lib.DistributionStrategyExtended): initial_loop_values = {} initial_loop_values = nest.flatten(initial_loop_values) - ctx = values.MultiStepContext() + ctx = input_lib.MultiStepContext() def body(i, *args): """A wrapper around `fn` to create the while loop body.""" del args diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy.py b/tensorflow/contrib/distribute/python/parameter_server_strategy.py index a6e924b509..461e1bca21 100644 --- a/tensorflow/contrib/distribute/python/parameter_server_strategy.py +++ b/tensorflow/contrib/distribute/python/parameter_server_strategy.py @@ -24,6 +24,7 @@ from tensorflow.contrib.distribute.python import mirrored_strategy from tensorflow.python.distribute import cross_device_ops as cross_device_ops_lib from tensorflow.python.distribute import device_util from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import multi_worker_util from tensorflow.python.distribute import values from tensorflow.python.eager import context @@ -153,7 +154,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended): compute_devices = (worker_device,) self._device_map = values.ReplicaDeviceMap(compute_devices) - self._input_workers = values.InputWorkers( + self._input_workers = input_lib.InputWorkers( self._device_map, [(worker_device, compute_devices)]) # In distributed mode, place variables on ps jobs in a round-robin fashion. @@ -210,7 +211,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended): compute_devices = (_LOCAL_CPU,) self._device_map = values.ReplicaDeviceMap(compute_devices) - self._input_workers = values.InputWorkers( + self._input_workers = input_lib.InputWorkers( self._device_map, [(worker_device, compute_devices)]) # If there is only one GPU, put everything on that GPU. Otherwise, place @@ -237,13 +238,13 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended): def _distribute_dataset(self, dataset_fn): """Distributes the dataset to each local GPU.""" - return values.PerReplicaDataset( + return input_lib.PerReplicaDataset( self._call_dataset_fn(dataset_fn), self._input_workers, 0, prefetch_on_device=True) def _make_dataset_iterator(self, dataset): - return values.DatasetIterator(dataset, self._input_workers, - self._num_replicas_in_sync) + return input_lib.DatasetIterator(dataset, self._input_workers, + self._num_replicas_in_sync) def _make_input_fn_iterator( self, @@ -262,7 +263,7 @@ class ParameterServerExtended(distribute_lib.DistributionStrategyExtended): num_input_pipelines=num_input_pipelines, input_pipeline_id=input_pipeline_id, num_replicas_in_sync=self._num_replicas_in_sync) - return values.InputFunctionIterator( + return input_lib.InputFunctionIterator( input_fn, self._input_workers, [input_context]) def _broadcast_to(self, tensor, destinations): diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py index 89b48d3f13..10b7ef0407 100644 --- a/tensorflow/contrib/distribute/python/tpu_strategy.py +++ b/tensorflow/contrib/distribute/python/tpu_strategy.py @@ -33,6 +33,7 @@ from tensorflow.python.client import session as session_lib from tensorflow.python.distribute import cross_device_ops as cross_device_ops_lib from tensorflow.python.distribute import device_util from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import reduce_util from tensorflow.python.distribute import values from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver as resolver_lib @@ -204,7 +205,8 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended): (self.get_host(hid), [self.get_host_cpu_device(hid)]) for hid in range(self.num_hosts) ] - self._input_workers = values.InputWorkers(input_device_map, worker_devices) + self._input_workers = input_lib.InputWorkers( + input_device_map, worker_devices) # TODO(sourabhbajaj): Remove this once performance of running one step # at a time is comparable to multiple steps. @@ -304,11 +306,11 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended): def _make_dataset_iterator(self, dataset): """Make iterators for each of the TPU hosts.""" - return values.DatasetIterator(dataset, self._input_workers, - self._num_replicas_in_sync) + return input_lib.DatasetIterator(dataset, self._input_workers, + self._num_replicas_in_sync) def _distribute_dataset(self, dataset_fn): - return values.MultiWorkerDataset( + return input_lib.MultiWorkerDataset( functools.partial(self._call_dataset_fn, dataset_fn), self._input_workers) @@ -339,7 +341,7 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended): if initial_loop_values is None: initial_loop_values = {} initial_loop_values = nest.flatten(initial_loop_values) - ctx = values.MultiStepContext() + ctx = input_lib.MultiStepContext() def run_fn(*args, **kwargs): """Single step on the TPU device.""" diff --git a/tensorflow/contrib/distribute/python/values_test.py b/tensorflow/contrib/distribute/python/values_test.py index 73efb524b9..51c58b0b2f 100644 --- a/tensorflow/contrib/distribute/python/values_test.py +++ b/tensorflow/contrib/distribute/python/values_test.py @@ -22,28 +22,20 @@ import os from absl.testing import parameterized from tensorflow.contrib.distribute.python import combinations -from tensorflow.contrib.distribute.python import multi_worker_test_base from tensorflow.core.protobuf import config_pb2 -from tensorflow.python.data.experimental.ops import batching -from tensorflow.python.data.ops import dataset_ops from tensorflow.python.distribute import device_util -from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import values from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.framework import constant_op -from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.training import saver as saver_lib -from tensorflow.python.util import nest class DistributedValuesTest(test.TestCase): @@ -354,444 +346,6 @@ class RegroupAndSelectDeviceTest(test.TestCase): merged_estimator_spec)) -class PerReplicaDatasetTest(test.TestCase): - - config = config_pb2.ConfigProto() - config.allow_soft_placement = True - - def _test_iterator(self, devices, dataset, expected_values): - device_map = values.ReplicaDeviceMap(devices) - input_workers = values.InputWorkers(device_map) - per_replica_dataset = values.PerReplicaDataset(dataset, input_workers, 0) - if context.executing_eagerly(): - iterator = per_replica_dataset.make_one_shot_iterator() - else: - iterator = per_replica_dataset.make_initializable_iterator() - self.evaluate([iterator.initializer]) - - for expected_value in expected_values: - next_element = iterator.get_next_as_list() - computed_value = self.evaluate(next_element) - self.assertEqual(expected_value, computed_value) - - with self.assertRaises(errors.OutOfRangeError): - next_element = iterator.get_next_as_list() - self.evaluate(next_element) - - @test_util.run_in_graph_and_eager_modes - def testOneDevice(self): - devices = ["/device:CPU:0"] - dataset = dataset_ops.Dataset.range(10) - - expected_values = [[i] for i in range(10)] - - self._test_iterator(devices, dataset, expected_values) - - @test_util.run_in_graph_and_eager_modes(config=config) - def testMultipleDevices(self): - if context.num_gpus() < 1 and context.executing_eagerly(): - self.skipTest("A GPU is not available for this test in eager mode.") - - devices = ["/device:CPU:0", "/device:GPU:0"] - dataset = dataset_ops.Dataset.range(10) - - expected_values = [[i, i+1] for i in range(0, 10, 2)] - - self._test_iterator(devices, dataset, expected_values) - - @test_util.run_in_graph_and_eager_modes(config=config) - def testTupleDataset(self): - if context.num_gpus() < 1 and context.executing_eagerly(): - self.skipTest("A GPU is not available for this test in eager mode.") - - devices = ["/device:CPU:0", "/device:GPU:0"] - dataset1 = dataset_ops.Dataset.range(10) - dataset2 = dataset_ops.Dataset.range(10).map(lambda x: x**2) - dataset = dataset_ops.Dataset.zip((dataset1, dataset2)) - - expected_values = [[(i, i**2), (i+1, (i+1)**2)] for i in range(0, 10, 2)] - - self._test_iterator(devices, dataset, expected_values) - - @test_util.run_in_graph_and_eager_modes(config=config) - def testUnevenDatasetBatches(self): - if context.num_gpus() < 1 and context.executing_eagerly(): - self.skipTest("A GPU is not available for this test in eager mode.") - - devices = ["/device:CPU:0", "/device:GPU:0"] - dataset = dataset_ops.Dataset.range(11) - - expected_values = [[i, i+1] for i in range(0, 10, 2)] - self._test_iterator(devices, dataset, expected_values) - - def testInitializableIterator(self): - with context.graph_mode(): - devices = ["/device:CPU:0"] - # Using random input since that is only allowed with initializable - # iterator. - dataset = dataset_ops.Dataset.from_tensor_slices( - random_ops.random_uniform((10,))) - - device_map = values.ReplicaDeviceMap(devices) - input_workers = values.InputWorkers(device_map) - per_replica_dataset = values.PerReplicaDataset(dataset, input_workers, 0) - iterator = per_replica_dataset.make_initializable_iterator() - - self.evaluate(iterator.initializer) - next_element = iterator.get_next_as_list() - for _ in range(10): - self.evaluate(next_element) - - # Should fail after the input is finished. - with self.assertRaises(errors.OutOfRangeError): - self.evaluate(next_element) - - # After re-initializing the iterator, should be able to iterate again. - self.evaluate(iterator.initializer) - for _ in range(10): - self.evaluate(next_element) - - -class MultiWorkerDatasetTest(multi_worker_test_base.MultiWorkerTestBase): - - def _test_iterator(self, sess, iterator, devices, expected_values): - next_element = iterator.get_next() - for r, device in enumerate(devices): - v = values.select_replica(r, next_element) - # The `v` here can be a tuple. - for element in nest.flatten(v): - self.assertTrue(element.device in device) - - for expected_value in expected_values: - t = [values.select_replica(r, next_element) for r in range(len(devices))] - actual = sess.run(t) - self.assertEqual(expected_value, actual) - - with self.assertRaises(errors.OutOfRangeError): - sess.run([values.select_replica(r, next_element) - for r in range(len(devices))]) - - def _test_dataset(self, dataset_fn, worker_devices, devices, - expected_values): - device_map = values.ReplicaDeviceMap(devices) - input_workers = values.InputWorkers(device_map, worker_devices) - multi_worker_dataset = values.MultiWorkerDataset( - dataset_fn, input_workers) - multi_worker_iterator = multi_worker_dataset.make_initializable_iterator() - with self.cached_session() as sess: - sess.run(multi_worker_iterator.initializer) - self._test_iterator(sess, multi_worker_iterator, devices, expected_values) - - def _cpu_devices(self): - worker_devices = ( - ("/job:worker/replica:0/task:0", - ["/job:worker/replica:0/task:0/device:CPU:0"]), - ("/job:worker/replica:0/task:1", - ["/job:worker/replica:0/task:1/device:CPU:0"]) - ) - devices = [ - "/job:worker/replica:0/task:0/device:CPU:0", - "/job:worker/replica:0/task:1/device:CPU:0" - ] - return worker_devices, devices - - def _cpu_and_one_gpu_devices(self): - worker_devices = ( - ("/job:worker/replica:0/task:0", ( - "/job:worker/replica:0/task:0/device:GPU:0", - "/job:worker/replica:0/task:0/device:CPU:0" - )), - ("/job:worker/replica:0/task:1", ( - "/job:worker/replica:0/task:1/device:GPU:0", - "/job:worker/replica:0/task:1/device:CPU:0" - )) - ) - devices = [ - "/job:worker/replica:0/task:0/device:GPU:0", - "/job:worker/replica:0/task:0/device:CPU:0", - "/job:worker/replica:0/task:1/device:GPU:0", - "/job:worker/replica:0/task:1/device:CPU:0" - ] - return worker_devices, devices - - def testDataDistributionOneDevicePerWorker(self): - worker_devices, devices = self._cpu_devices() - with context.graph_mode(): - dataset_fn = lambda: dataset_ops.Dataset.range(8) - self._test_dataset( - dataset_fn, worker_devices, devices, - [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]) - - def testDataDistributionTwoDevicePerWorker(self): - if context.num_gpus() < 1: - self.skipTest("A GPU is not available for this test.") - worker_devices, devices = self._cpu_and_one_gpu_devices() - with context.graph_mode(): - dataset_fn = lambda: dataset_ops.Dataset.range(8) - self._test_dataset( - dataset_fn, worker_devices, devices, - [[0, 1, 0, 1], [2, 3, 2, 3], [4, 5, 4, 5], [6, 7, 6, 7]]) - - def testTupleDataset(self): - worker_devices, devices = self._cpu_devices() - - with context.graph_mode(): - - def dataset_fn(): - dataset1 = dataset_ops.Dataset.range(8) - dataset2 = dataset_ops.Dataset.range(8).map(lambda x: x**2) - return dataset_ops.Dataset.zip((dataset1, dataset2)) - - expected_values = [[(i, i**2), (i, i**2)] for i in range(8)] - self._test_dataset(dataset_fn, worker_devices, devices, - expected_values) - - def testInitializableIterator(self): - worker_devices, devices = self._cpu_devices() - with context.graph_mode(), self.cached_session() as sess: - dataset_fn = lambda: dataset_ops.Dataset.range(8) - device_map = values.ReplicaDeviceMap(devices) - input_workers = values.InputWorkers(device_map, worker_devices) - multi_worker_dataset = values.MultiWorkerDataset( - dataset_fn, input_workers) - multi_worker_iterator = multi_worker_dataset.make_initializable_iterator() - - sess.run(multi_worker_iterator.initializer) - self._test_iterator( - sess, multi_worker_iterator, devices, - [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]) - - # After re-initializing the iterator, should be able to iterate again. - sess.run(multi_worker_iterator.initializer) - self._test_iterator( - sess, multi_worker_iterator, devices, - [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]]) - - def testValueErrorForIterator(self): - # Incompatiable arguments. - d1 = "/device:GPU:0" - d2 = "/device:GPU:1" - device_map = values.ReplicaDeviceMap([d1, d2]) - input_workers = values.InputWorkers( - device_map, (("w1", (d1,)), ("w2", (d2,)))) - with self.assertRaises(ValueError): - values.MultiWorkerDataIterator([("w1", None)], input_workers) - - def testDuplicateDevices(self): - _, devices = self._cpu_devices() - devices.append("/job:worker/replica:0/task:0/device:CPU:0") - with self.assertRaises(ValueError): - _ = values.ReplicaDeviceMap(devices) - - -class InputIteratorTestBase(test.TestCase): - - def _test_iterator(self, input_type, dataset_fn, worker_device_pairs, - expected_values, sess=None, split_batch_by=None): - devices = nest.flatten([ds for _, ds in worker_device_pairs]) - device_map = values.ReplicaDeviceMap(devices) - input_workers = values.InputWorkers(device_map, worker_device_pairs) - - if input_type == "input_fn": - input_contexts = [ - distribute_lib.InputContext() for _ in worker_device_pairs] - input_fn = lambda _: dataset_fn() - iterator = values.InputFunctionIterator( - input_fn, input_workers, input_contexts) - else: - iterator = values.DatasetIterator( - dataset_fn(), input_workers, split_batch_by) - - evaluate = lambda x: sess.run(x) if sess else self.evaluate(x) - - evaluate(control_flow_ops.group(iterator.initialize())) - - for expected_value in expected_values: - next_element = iterator.get_next() - computed_value = evaluate( - [values.select_replica(r, next_element) for r in range(len(devices))]) - self.assertAllEqual(expected_value, computed_value) - - with self.assertRaises(errors.OutOfRangeError): - next_element = iterator.get_next() - evaluate([values.select_replica(r, next_element) - for r in range(len(devices))]) - - # After re-initializing the iterator, should be able to iterate again. - evaluate(control_flow_ops.group(iterator.initialize())) - - for expected_value in expected_values: - next_element = iterator.get_next() - computed_value = evaluate( - [values.select_replica(r, next_element) for r in range(len(devices))]) - self.assertAllEqual(expected_value, computed_value) - - -class InputIteratorSingleWorkerTest(InputIteratorTestBase, - parameterized.TestCase): - - @combinations.generate(combinations.combine( - mode=["graph", "eager"], - input_type=["input_fn", "dataset"])) - def testOneDeviceCPU(self, input_type): - worker_device_pairs = [("", ["/device:CPU:0"])] - dataset_fn = lambda: dataset_ops.Dataset.range(10) - - expected_values = [[i] for i in range(10)] - - self._test_iterator(input_type, dataset_fn, worker_device_pairs, - expected_values) - - @combinations.generate(combinations.combine( - mode=["graph", "eager"], - input_type=["input_fn", "dataset"], - required_gpus=1)) - def testTwoDevicesOneGPUOneCPU(self, input_type): - worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] - dataset_fn = lambda: dataset_ops.Dataset.range(10) - - expected_values = [[i, i+1] for i in range(0, 10, 2)] - - self._test_iterator(input_type, dataset_fn, worker_device_pairs, - expected_values) - - @combinations.generate(combinations.combine( - mode=["graph", "eager"], - input_type=["input_fn", "dataset"], - required_gpus=1)) - def testTupleDataset(self, input_type): - worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] - def dataset_fn(): - dataset1 = dataset_ops.Dataset.range(10) - dataset2 = dataset_ops.Dataset.range(10).map(lambda x: x**2) - return dataset_ops.Dataset.zip((dataset1, dataset2)) - - expected_values = [[(i, i**2), (i+1, (i+1)**2)] for i in range(0, 10, 2)] - - self._test_iterator(input_type, dataset_fn, worker_device_pairs, - expected_values) - - @combinations.generate(combinations.combine( - mode=["graph", "eager"], - input_type=["input_fn", "dataset"], - required_gpus=1)) - def testUnevenDatasetBatches(self, input_type): - worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] - dataset_fn = lambda: dataset_ops.Dataset.range(11) - - expected_values = [[i, i+1] for i in range(0, 10, 2)] - self._test_iterator(input_type, dataset_fn, worker_device_pairs, - expected_values) - - @combinations.generate(combinations.combine( - mode=["graph", "eager"], - input_type=["dataset"], - split_batch_by=[None, 2], - required_gpus=1)) - def testBatchSplitting(self, input_type, split_batch_by): - worker_device_pairs = [("", ["/device:GPU:0", "/device:CPU:0"])] - batch_size = 10 - dataset_fn = lambda: dataset_ops.Dataset.range(100).batch(batch_size) - - updated_batch_size = ( - batch_size // split_batch_by if split_batch_by else batch_size) - expected_values = [[range(i, i+updated_batch_size), - range(i+updated_batch_size, i+2*updated_batch_size)] - for i in range(0, 100, updated_batch_size*2)] - - self._test_iterator(input_type, dataset_fn, worker_device_pairs, - expected_values, sess=None, - split_batch_by=split_batch_by) - - -class InputIteratorMultiWorkerTest( - multi_worker_test_base.MultiWorkerTestBase, InputIteratorTestBase, - parameterized.TestCase): - - def _cpu_devices(self): - return [ - ("/job:worker/replica:0/task:0", - ["/job:worker/replica:0/task:0/device:CPU:0"]), - ("/job:worker/replica:0/task:1", - ["/job:worker/replica:0/task:1/device:CPU:0"])] - - def _cpu_and_one_gpu_devices(self): - return [ - ("/job:worker/replica:0/task:0", [ - "/job:worker/replica:0/task:0/device:GPU:0", - "/job:worker/replica:0/task:0/device:CPU:0" - ]), - ("/job:worker/replica:0/task:1", [ - "/job:worker/replica:0/task:1/device:GPU:0", - "/job:worker/replica:0/task:1/device:CPU:0" - ]) - ] - - @combinations.generate(combinations.combine( - mode=["graph"], - input_type=["input_fn", "dataset"])) - def testOneDevicePerWorker(self, input_type): - worker_devices = self._cpu_devices() - with context.graph_mode(), self.cached_session() as sess: - dataset_fn = lambda: dataset_ops.Dataset.range(4) - self._test_iterator(input_type, dataset_fn, worker_devices, - [[0, 0], [1, 1], [2, 2], [3, 3]], sess) - - @combinations.generate(combinations.combine( - mode=["graph"], - input_type=["input_fn", "dataset"], - required_gpus=1)) - def testTwoDevicesPerWorker(self, input_type): - worker_devices = self._cpu_and_one_gpu_devices() - with context.graph_mode(), self.cached_session() as sess: - dataset_fn = lambda: dataset_ops.Dataset.range(4) - self._test_iterator(input_type, dataset_fn, worker_devices, - [[0, 1, 0, 1], [2, 3, 2, 3]], sess) - - @combinations.generate(combinations.combine( - mode=["graph"], - input_type=["input_fn", "dataset"])) - def testTupleDataset(self, input_type): - worker_devices = self._cpu_devices() - with context.graph_mode(), self.cached_session() as sess: - def dataset_fn(): - dataset1 = dataset_ops.Dataset.range(4) - dataset2 = dataset_ops.Dataset.range(4).map(lambda x: x**2) - return dataset_ops.Dataset.zip((dataset1, dataset2)) - - expected_values = [[(i, i**2), (i, i**2)] for i in range(0, 4)] - self._test_iterator(input_type, dataset_fn, worker_devices, - expected_values, sess) - - -class SplitDatasetBatchTest(test.TestCase): - - def testBatchDataset(self): - dataset = dataset_ops.Dataset.range(100).batch(20) - split_batch_by = 2 - result_dataset = values._split_dataset_batch(dataset, split_batch_by) - expected_values = [range(i, i+10) for i in range(0, 100, 10)] - result = [self.evaluate(el) for el in result_dataset] - self.assertAllEqual(expected_values, result) - - def testMapAndBatchDataset(self): - dataset = dataset_ops.Dataset.range(100) - dataset = dataset.apply(batching.map_and_batch(lambda x: x, 20)) - split_batch_by = 2 - result_dataset = values._split_dataset_batch(dataset, split_batch_by) - expected_values = [range(i, i+10) for i in range(0, 100, 10)] - result = [self.evaluate(el) for el in result_dataset] - self.assertAllEqual(expected_values, result) - - def testPrefetchDataset(self): - dataset = dataset_ops.Dataset.range(100).batch(20).prefetch(1) - split_batch_by = 2 - result_dataset = values._split_dataset_batch(dataset, split_batch_by) - expected_values = [range(i, i+10) for i in range(0, 100, 10)] - result = [self.evaluate(el) for el in result_dataset] - self.assertAllEqual(expected_values, result) - - class MirroredVariableTest(test.TestCase, parameterized.TestCase): config = config_pb2.ConfigProto() diff --git a/tensorflow/python/distribute/BUILD b/tensorflow/python/distribute/BUILD index 02957b2fef..987fb00454 100644 --- a/tensorflow/python/distribute/BUILD +++ b/tensorflow/python/distribute/BUILD @@ -219,6 +219,7 @@ py_library( ":cross_device_ops", ":device_util", ":distribute_lib", + ":input_lib", ":multi_worker_util", ":reduce_util", ":shared_variable_creator", @@ -253,6 +254,23 @@ py_library( ], ) +py_library( + name = "input_lib", + srcs = ["input_lib.py"], + deps = [ + ":device_util", + ":distribute_lib", + ":input_ops", + ":values", + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_ops", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:multi_device_iterator_ops", + "//tensorflow/python/eager:context", + ], +) + py_library( name = "input_ops", srcs = ["input_ops.py"], @@ -348,14 +366,12 @@ py_library( deps = [ ":device_util", ":distribute_lib", - ":input_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", "//tensorflow/python:framework_ops", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:training", "//tensorflow/python:util", - "//tensorflow/python/data/ops:multi_device_iterator_ops", "//tensorflow/python/eager:context", "//tensorflow/python/training/checkpointable:base", "@six_archive//:six", diff --git a/tensorflow/python/distribute/input_lib.py b/tensorflow/python/distribute/input_lib.py new file mode 100644 index 0000000000..cbe6518e5c --- /dev/null +++ b/tensorflow/python/distribute/input_lib.py @@ -0,0 +1,707 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Various classes representing distributed inputs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.experimental.ops import batching +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import multi_device_iterator_ops +from tensorflow.python.distribute import device_util +from tensorflow.python.distribute import distribution_strategy_context +from tensorflow.python.distribute import input_ops +from tensorflow.python.distribute import values +from tensorflow.python.eager import context +from tensorflow.python.framework import device as tf_device +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.util import nest + + +class InputWorkers(object): + """A 1-to-many mapping from input worker devices to compute devices.""" + + def __init__(self, device_map, worker_device_pairs=None, logical_device=0): + """Initialize an `InputWorkers` object. + + Args: + device_map: A `DeviceMap` with the computation devices fed by the + input workers. + worker_device_pairs: A sequence of pairs: + `(input device, a tuple of compute devices fed by that input device)`. + logical_device: The logical device of `device_map` to feed. + """ + self._device_map = device_map + self._logical_device = logical_device + if worker_device_pairs is None: + worker_device_pairs = (( + device_util.canonicalize("/device:CPU:0"), + device_map.logical_to_actual_devices(logical_device)),) + self._input_worker_devices = tuple(d for d, _ in worker_device_pairs) + self._fed_devices = tuple(tuple(device_util.canonicalize(d) for d in f) + for _, f in worker_device_pairs) + flattened = tuple(d for l in self._fed_devices for d in l) + assert (flattened == + device_map.logical_to_actual_devices(logical_device)), ( + "flattened: %s logical device %d: %s" % + (flattened, logical_device, + device_map.logical_to_actual_devices(logical_device))) + + @property + def device_map(self): + return self._device_map + + @property + def logical_device(self): + return self._logical_device + + @property + def num_workers(self): + return len(self._input_worker_devices) + + @property + def worker_devices(self): + return self._input_worker_devices + + def compute_devices_for_worker(self, worker_index): + return self._fed_devices[worker_index] + + def __repr__(self): + devices = self.worker_devices + debug_repr = ",\n".join(" %d %s: %s" % + (i, devices[i], self._fed_devices[i]) + for i in range(len(devices))) + return "%s:{\n%s\n device_map: %s}" % ( + self.__class__.__name__, debug_repr, self._device_map) + + +class PerReplicaDataIterator(object): + """An iterator (like `tf.data.Iterator`) into a `PerReplicaDataset`.""" + + def __init__(self, iterator, input_workers, worker_index, prefetch_on_device): + assert isinstance(input_workers, InputWorkers) + self._iterator = iterator + self._input_workers = input_workers + self._worker_index = worker_index + self._prefetch_on_device = prefetch_on_device + + @property + def initializer(self): + return self._iterator.initializer + + def get_next_as_list(self, name=None): + """Scatter the input across devices.""" + if self._prefetch_on_device: + data_list = self._iterator.get_next() + else: + batch = self._iterator.get_next(name=name) + data_list = [] + def get_ith(i): + return lambda x: x[i] + + devices = self._input_workers.compute_devices_for_worker( + self._worker_index) + for i, d in enumerate(devices): + v = nest.map_structure(get_ith(i), batch) + if context.executing_eagerly(): + with ops.device(d): + v = nest.map_structure(array_ops.identity, v) + data_list.append(v) + + return data_list + + def get_next(self, name=None): + assert self._input_workers.num_workers == 1 + data_list = self.get_next_as_list(name) + return values.regroup(self._input_workers.device_map, data_list) + + @property + def output_classes(self): + return self._iterator.output_classes + + @property + def output_shapes(self): + return self._iterator.output_shapes + + @property + def output_types(self): + return self._iterator.output_types + + +class PerReplicaDataset(object): + """Like `tf.data.Dataset` split devices, producing `PerReplica` data.""" + + def __init__(self, dataset, input_workers, worker_index, + prefetch_on_device=None): + assert isinstance(input_workers, InputWorkers) + assert worker_index is not None + assert worker_index is not True # pylint: disable=g-bool-id-comparison + assert worker_index is not False # pylint: disable=g-bool-id-comparison + self._input_workers = input_workers + self._worker_index = worker_index + + # Default to using prefetching, unless specified. + self._prefetch_on_device = prefetch_on_device + if self._prefetch_on_device is None: + self._prefetch_on_device = True + + self._dataset = dataset + if not self._prefetch_on_device: + # TODO(priyag): If dropping remainder is not appropriate, find another + # approach to distributing the dataset when not possible to divide evenly. + # Possibly not an issue when we start using PartitionedDataset. + num_replicas = len( + self._input_workers.compute_devices_for_worker(self._worker_index)) + self._dataset = self._dataset.batch(num_replicas, drop_remainder=True) + else: + self._replica_devices = self._input_workers.compute_devices_for_worker( + self._worker_index) + + def make_one_shot_iterator(self): + """Get a one time use iterator for the distributed PerReplicaDataset.""" + # Graph mode with one shot iterator is disabled. + if not context.executing_eagerly(): + raise ValueError("Cannot create a one shot iterator. Please use " + "`make_initializable_iterator()` instead.") + if self._prefetch_on_device: + dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator( + self._dataset, self._replica_devices) + else: + dataset_iterator = dataset_ops.make_one_shot_iterator(self._dataset) + return PerReplicaDataIterator( + dataset_iterator, + self._input_workers, + self._worker_index, + prefetch_on_device=self._prefetch_on_device) + + def make_initializable_iterator(self): + """Get an initializable iterator for the distributed PerReplicaDataset.""" + # Eager mode generates already initialized iterators. Hence we cannot create + # an initializable iterator. + if context.executing_eagerly(): + raise ValueError("Cannot create initializable iterator in Eager mode. " + "Please use `make_one_shot_iterator` instead.") + if self._prefetch_on_device: + dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator( + self._dataset, self._replica_devices) + else: + dataset_iterator = dataset_ops.make_initializable_iterator(self._dataset) + return PerReplicaDataIterator( + dataset_iterator, self._input_workers, self._worker_index, + prefetch_on_device=self._prefetch_on_device) + + +class MultiWorkerDataIterator(object): + """An iterator (like `tf.data.Iterator`) into a `MultiWorkerDataset`.""" + + def __init__(self, iterators, input_workers): + """Initialize the `MultiWorkerDataIterator` object. + + Args: + iterators: a list of worker, iterator pairs. + input_workers: an `InputWorkers` object. + + Raises: + ValueError: if iterators and input_workers are not compatible. + """ + assert isinstance(input_workers, InputWorkers) + workers = tuple(d for d, _ in iterators) + if workers != input_workers.worker_devices: + raise ValueError("iterators and input_workers are not compatible. " + "iterator workers: %r input_workers devices: %r" % + (workers, input_workers.worker_devices)) + self._iterators = tuple(i for _, i in iterators) + self._input_workers = input_workers + + @property + def initializer(self): + return control_flow_ops.group( + tuple(iterator.initializer for iterator in self._iterators)) + + def get_iterator(self, worker): + for i, w in enumerate(self._input_workers.worker_devices): + if worker == w: + return self._iterators[i] + return None + + @property + def output_shapes(self): + return self._iterators[0].output_shapes + + @property + def output_types(self): + return self._iterators[0].output_types + + def get_next(self, name=None): + """Scatter the input across hosts and devices.""" + replicas = [] + for worker, iterator in zip(self._input_workers.worker_devices, + self._iterators): + if name is not None: + d = tf_device.DeviceSpec.from_string(worker) + new_name = "%s_%s_%d" % (name, d.job, d.task) + else: + new_name = None + with ops.device(worker): + data_per_worker = iterator.get_next_as_list(name=new_name) + # Append to replicas to get a flat list of values indexed by replica. + replicas.extend(data_per_worker) + + return values.regroup(self._input_workers.device_map, replicas) + + +class MultiWorkerDataset(object): + """Like a `tf.data.Dataset` that distributes data to different workers. + + Each worker gets one shard of the input dataset. This currently does not work + in eager mode. + """ + + def __init__(self, dataset_fn, input_workers, prefetch_on_device=None, + auto_shard=False): + """Initialize the MultiWorkerDataset object. + + Args: + dataset_fn: a function or a list of functions that returns a + `tf.data.Dataset`. + input_workers: an `InputWorkers` object. + prefetch_on_device: whether to prefetch to devices. + auto_shard: whether to auto-shard the dataset. + """ + assert isinstance(input_workers, InputWorkers) + if isinstance(dataset_fn, (list, tuple)): + if len(dataset_fn) != input_workers.num_workers: + raise ValueError("If `dataset_fn` is a list, it must have one entry " + "per worker") + # TODO(rohanj): b/120673685 to track re-enabling auto sharding. + if auto_shard: + raise ValueError("Currently autosharding is not supported.") + self._input_workers = input_workers + self._datasets = [] + # TODO(yuefengz, priyag): support different set of jobs for input + # processing. + for i, worker in enumerate(input_workers.worker_devices): + with ops.device(worker): + if isinstance(dataset_fn, (list, tuple)): + worker_input = dataset_fn[i]() + else: + worker_input = dataset_fn() + dataset = PerReplicaDataset(worker_input, input_workers, i, + prefetch_on_device=prefetch_on_device) + self._datasets.append((worker, dataset)) + + def make_one_shot_iterator(self): + iterators = [] + for worker, dataset in self._datasets: + with ops.device(worker): + iterators.append((worker, dataset_ops.make_one_shot_iterator(dataset))) + return MultiWorkerDataIterator(iterators, self._input_workers) + + def make_initializable_iterator(self): + iterators = [] + for worker, dataset in self._datasets: + with ops.device(worker): + iterators.append( + (worker, dataset_ops.make_initializable_iterator(dataset))) + return MultiWorkerDataIterator(iterators, self._input_workers) + + +class InputIterator(object): + """An input iterator, intended to be passed to `DistributionStrategy.run`.""" + + def get_next(self): + """Returns the next inputs for all replicas.""" + raise NotImplementedError("must be implemented in descendants") + + def initialize(self): + """Initialize the underlying input dataset, when applicable. + + In eager mode, this will create a new iterator and return it. + In graph mode, this will initialize the same underlying iterator(s). + + Users are required to call this if + - This iterator was returned from a call to `make_input_fn_iterator` with an + input function that returns a dataset. + - Or this iterator was returned from a call to `make_dataset_iterator`. + + Returns: + A list of initialization ops to be executed. + """ + raise NotImplementedError("must be implemented in descendants") + + +class InputIteratorImpl(InputIterator): + """Common implementation for all input iterators.""" + + def __init__(self, input_workers, iterators): + assert isinstance(input_workers, InputWorkers) + if not input_workers.worker_devices: + raise ValueError("Should have at least one worker for input iterator.") + + self._iterators = iterators + self._input_workers = input_workers + + def get_next(self, name=None): + """Returns the next input from the iterator for all replicas.""" + replicas = [] + for i, worker in enumerate(self._input_workers.worker_devices): + if name is not None: + d = tf_device.DeviceSpec.from_string(worker) + new_name = "%s_%s_%d" % (name, d.job, d.task) + else: + new_name = None + with ops.device(worker): + # Make `replicas` a flat list of values across all replicas. + replicas.extend(self._iterators[i].get_next_as_list(new_name)) + + return values.regroup(self._input_workers.device_map, replicas) + + def initialize(self): + """Initialze underlying iterators. + + Returns: + A list of any initializer ops that should be run. + """ + init_ops = [] + for it in self._iterators: + init_ops.extend(it.initialize()) + return init_ops + + # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. + @property + def output_classes(self): + return self._iterators[0].output_classes + + # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. + @property + def output_shapes(self): + return self._iterators[0].output_shapes + + # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. + @property + def output_types(self): + return self._iterators[0].output_types + + # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. + def get_iterator(self, worker): + for i, w in enumerate(self._input_workers.worker_devices): + if worker == w: + return self._iterators[i] + return None + + +class InputFunctionIterator(InputIteratorImpl): + """Iterator created from input function.""" + + def __init__(self, input_fn, input_workers, input_contexts): + """Make an iterator for input provided via an input function. + + Currently implements PER_WORKER mode, in which the `input_fn` is called + once on each worker. + + TODO(priyag): Add other replication modes. + TODO(priyag): Allow taking input function that returns a callable that + returns nest of tensors. + + Args: + input_fn: Input function that returns a `tf.data.Dataset` object. + input_workers: an `InputWorkers` object. + input_contexts: A list of `InputContext` instances to be passed to call(s) + to `input_fn`. Length and order should match worker order in + `worker_device_pairs`. + """ + assert isinstance(input_workers, InputWorkers) + if input_workers.num_workers != len(input_contexts): + raise ValueError( + "Number of input workers (%d) is not same as number of " + "input_contexts (%d)" % + (input_workers.num_workers, len(input_contexts))) + + iterators = [] + for i, ctx in enumerate(input_contexts): + worker = input_workers.worker_devices[i] + with ops.device(worker): + result = input_fn(ctx) + if not isinstance(result, dataset_ops.DatasetV2): + raise ValueError("input_fn must return a tf.data.Dataset.") + devices = input_workers.compute_devices_for_worker(i) + iterator = _SingleWorkerDatasetIterator(result, worker, devices) + iterators.append(iterator) + + super(InputFunctionIterator, self).__init__(input_workers, iterators) + + +class DatasetIterator(InputIteratorImpl): + """Iterator created from input dataset.""" + + def __init__(self, dataset, input_workers, split_batch_by=None): + """Make an iterator for the dataset on given devices. + + If `split_batch_by` is not None, we "split" each batch of the + dataset by `split_batch_by` value. To achieve this, we first unbatch the + input dataset and then rebatch it with the per replica batch size that is + calculated using `global_batch_size // split_batch_by`. + The currently supported datasets are as follows: + `dataset.batch()` is the last operation on the dataset OR + `dataset.apply(map_and_batch)` is the last operation on the dataset OR + `dataset.batch().prefetch()` are the last 2 operations on the dataset OR + `dataset.apply(map_and_batch).prefetch()` are the last 2 operations. + + TODO(priyag): Support multi worker / host cases properly by cloning + and sharding the dataset on each worker. Current setup will only work in + some cases, such as in-graph multi worker GPU case. If the input pipeline + has random shuffling (with a different seed on each worker), each worker + will see random input from the same overall dataset in each step. Otherwise, + each worker will see the same input in each step. + + Args: + dataset: `tf.data.Dataset` that will be used as the input source. + input_workers: an `InputWorkers` object. + split_batch_by: Optional integer. If present, we "split" each batch of the + dataset by `split_batch_by` value. + """ + assert isinstance(input_workers, InputWorkers) + if split_batch_by: + dataset = _split_dataset_batch(dataset, split_batch_by) + + iterators = [] + for i, worker in enumerate(input_workers.worker_devices): + with ops.device(worker): + worker_devices = input_workers.compute_devices_for_worker(i) + cloned_dataset = dataset + if not context.executing_eagerly(): + cloned_dataset = input_ops._clone_dataset(dataset) # pylint: disable=protected-access + iterator = _SingleWorkerDatasetIterator(cloned_dataset, worker, + worker_devices) + iterators.append(iterator) + + super(DatasetIterator, self).__init__(input_workers, iterators) + + +class _SingleWorkerDatasetIterator(object): + """Iterator for a single `tf.data.Dataset`.""" + + def __init__(self, dataset, worker, devices): + """Create iterator for the `dataset` to fetch data to worker's `devices` . + + `MultiDeviceIterator` is used to prefetch input to the devices on the + given worker. + + Args: + dataset: A `tf.data.Dataset` instance. + worker: Worker on which ops should be created. + devices: Distribute data from `dataset` to these devices. + """ + self._dataset = dataset + self._worker = worker + self._devices = devices + self._make_iterator() + + def _make_iterator(self): + """Make appropriate iterator on the dataset.""" + with ops.device(self._worker): + self._iterator = multi_device_iterator_ops.MultiDeviceIterator( + self._dataset, self._devices) + + def get_next_as_list(self, name=None): + """Get next element from the underlying iterator.""" + del name + with ops.device(self._worker): + data_list = self._iterator.get_next() + return data_list + + def initialize(self): + """Initialze underlying iterator. + + In eager execution, this simply recreates the underlying iterator. + In graph execution, it returns the initializer ops for the underlying + iterator. + + Returns: + A list of any initializer ops that should be run. + """ + if context.executing_eagerly(): + self._make_iterator() + return [] + else: + return [self._iterator.initializer] + + @property + def output_classes(self): + return self._iterator.output_classes + + @property + def output_shapes(self): + return self._iterator.output_shapes + + @property + def output_types(self): + return self._iterator.output_types + + +def _split_dataset_batch(dataset, split_batch_by): + """Divide a batch-ed dataset's batches into smaller batches.""" + # TODO(sourabhbajaj): Remove this in lieu of distributed datasets + # pylint: disable=protected-access + def _get_batch_dataset(d): + """Get the underlying batch dataset from the dataset object.""" + if isinstance(d, dataset_ops.DatasetV1Adapter): + d = d._dataset + + if isinstance(d, (dataset_ops.BatchDataset, batching._MapAndBatchDataset)): + return d + elif isinstance(d, dataset_ops.PrefetchDataset): + return _get_batch_dataset(d._input_dataset) + raise ValueError( + "Unable to get batched dataset from the input dataset. `batch` " + "`map_and_batch` need to be the last operations on the dataset. " + "The batch operations can be followed by a prefetch.") + + batched_dataset = _get_batch_dataset(dataset) + if isinstance(batched_dataset, dataset_ops.BatchDataset): + batch_size = batched_dataset._batch_size + drop_remainder = batched_dataset._drop_remainder + elif isinstance(batched_dataset, batching._MapAndBatchDataset): + batch_size = batched_dataset._batch_size_t + drop_remainder = batched_dataset._drop_remainder_t + + prefetch_buffer = None + if isinstance(dataset, dataset_ops.PrefetchDataset): + prefetch_buffer = dataset._buffer_size + elif (isinstance(dataset, dataset_ops.DatasetV1Adapter) + and isinstance(dataset._dataset, dataset_ops.PrefetchDataset)): + prefetch_buffer = dataset._dataset._buffer_size + # pylint: enable=protected-access + + if tensor_util.is_tensor(batch_size): + batch_size = tensor_util.constant_value(batch_size) + + if tensor_util.is_tensor(drop_remainder): + drop_remainder = tensor_util.constant_value(drop_remainder) + + if batch_size % split_batch_by: + raise ValueError( + "Batch size %s cannot be sharded evenly across replicas %s" % ( + batch_size, split_batch_by)) + new_batch_size = batch_size // split_batch_by + + dataset = dataset.apply(batching.unbatch()) + dataset = dataset.batch(new_batch_size, drop_remainder=drop_remainder) + if prefetch_buffer is not None: + dataset = dataset.prefetch(prefetch_buffer) + return dataset + + +class MultiStepContext(object): + """A context object that can be used to capture things when running steps. + + This context object is useful when running multiple steps at a time using the + `experimental_run_steps_on_iterator` API. For e.g. it allows the user's step + function to specify which outputs to emit at what frequency. Currently it + supports capturing output from the last step, as well as capturing non tensor + outputs. In the future it will be augmented to support other use cases such + as output each N steps. + """ + + def __init__(self): + """Initialize an output context. + + Returns: + A context object. + """ + self._last_step_outputs = {} + self._last_step_outputs_reduce_ops = {} + self._non_tensor_outputs = {} + + @property + def last_step_outputs(self): + """A dictionary consisting of outputs to be captured on last step. + + Keys in the dictionary are names of tensors to be captured, as specified + when `set_last_step_output` is called. + Values in the dictionary are the tensors themselves. If + `set_last_step_output` was called with a `reduce_op` for this output, + then the value is the reduced value. + + Returns: + A dictionary with last step outputs. + """ + return self._last_step_outputs + + def _set_last_step_outputs(self, outputs): + """Replace the entire dictionary of last step outputs.""" + if not isinstance(outputs, dict): + raise ValueError("Need a dictionary to set last_step_outputs.") + self._last_step_outputs = outputs + + def set_last_step_output(self, name, output, reduce_op=None): + """Set `output` with `name` to be outputted from the last step. + + Args: + name: String, name to identify the output. Doesn't need to match tensor + name. + output: The tensors that should be outputted with `name`. See below for + actual types supported. + reduce_op: Reduction method to use to reduce outputs from multiple + replicas. Required if `set_last_step_output` is called in a replica + context. Optional in cross_replica_context. + When present, the outputs from all the replicas are reduced using the + current distribution strategy's `reduce` method. Hence, the type of + `output` must be what's supported by the corresponding `reduce` method. + For e.g. if using MirroredStrategy and reduction is set, output + must be a `PerReplica` value. + The reduce method is also recorded in a dictionary + `_last_step_outputs_reduce_ops` for later interpreting of the + outputs as already reduced or not. + """ + if distribution_strategy_context.in_cross_replica_context(): + self._last_step_outputs_reduce_ops[name] = reduce_op + if reduce_op is None: + self._last_step_outputs[name] = output + else: + distribution = distribution_strategy_context.get_distribution_strategy() + self._last_step_outputs[name] = distribution.reduce(reduce_op, output) + else: + assert reduce_op is not None + def merge_fn(distribution, value): + self._last_step_outputs[name] = distribution.reduce(reduce_op, value) + # Setting this inside the `merge_fn` because all replicas share the same + # context object, so it's more robust to set it only once (even if all + # the replicas are trying to set the same value). + self._last_step_outputs_reduce_ops[name] = reduce_op + + distribution_strategy_context.get_replica_context().merge_call( + merge_fn, args=(output,)) + + @property + def non_tensor_outputs(self): + """A dictionary consisting of any non tensor outputs to be captured.""" + return self._non_tensor_outputs + + def set_non_tensor_output(self, name, output): + """Set `output` with `name` to be captured as a non tensor output.""" + if distribution_strategy_context.in_cross_replica_context(): + self._non_tensor_outputs[name] = output + else: + def merge_fn(distribution, value): + # NOTE(priyag): For non tensor outputs, we simply return all the values + # in a list as reduction doesn't make sense on non tensors. + self._non_tensor_outputs[name] = distribution.unwrap(value) + distribution_strategy_context.get_replica_context().merge_call( + merge_fn, args=(output,)) diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py index 601eafbb5e..37b493d0f7 100644 --- a/tensorflow/python/distribute/mirrored_strategy.py +++ b/tensorflow/python/distribute/mirrored_strategy.py @@ -27,6 +27,7 @@ from tensorflow.python import pywrap_tensorflow from tensorflow.python.distribute import cross_device_ops as cross_device_ops_lib from tensorflow.python.distribute import device_util from tensorflow.python.distribute import distribute_lib +from tensorflow.python.distribute import input_lib from tensorflow.python.distribute import multi_worker_util from tensorflow.python.distribute import reduce_util from tensorflow.python.distribute import shared_variable_creator @@ -456,7 +457,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): "No duplicates allowed in `devices` argument: %s" % devices) # TODO(josh11b): Require at least 2 devices? self._device_map = values.ReplicaDeviceMap(devices) - self._input_workers = values.InputWorkers(self._device_map) + self._input_workers = input_lib.InputWorkers(self._device_map) self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best( devices) @@ -489,7 +490,8 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): self._default_device = workers[0] self._device_map = values.ReplicaDeviceMap(devices) - self._input_workers = values.InputWorkers(self._device_map, worker_devices) + self._input_workers = input_lib.InputWorkers( + self._device_map, worker_devices) self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce( workers, _infer_num_gpus_per_worker(devices)) @@ -543,16 +545,16 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): def _distribute_dataset(self, dataset_fn): if self._local_mode: worker_index = 0 - return values.PerReplicaDataset( + return input_lib.PerReplicaDataset( self._call_dataset_fn(dataset_fn), self._input_workers, worker_index) else: - return values.MultiWorkerDataset( + return input_lib.MultiWorkerDataset( functools.partial(self._call_dataset_fn, dataset_fn), self._input_workers, auto_shard=False) def _make_dataset_iterator(self, dataset): - return values.DatasetIterator( + return input_lib.DatasetIterator( dataset, self._input_workers, self._num_replicas_in_sync) def _make_input_fn_iterator( @@ -566,7 +568,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): num_input_pipelines=num_workers, input_pipeline_id=i, num_replicas_in_sync=self._num_replicas_in_sync)) - return values.InputFunctionIterator( + return input_lib.InputFunctionIterator( input_fn, self._input_workers, input_contexts) # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed. @@ -576,7 +578,7 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): initial_loop_values = {} initial_loop_values = nest.flatten(initial_loop_values) - ctx = values.MultiStepContext() + ctx = input_lib.MultiStepContext() def body(i, *args): """A wrapper around `fn` to create the while loop body.""" del args diff --git a/tensorflow/python/distribute/values.py b/tensorflow/python/distribute/values.py index 1f5077a75a..a9dcabdab6 100644 --- a/tensorflow/python/distribute/values.py +++ b/tensorflow/python/distribute/values.py @@ -23,17 +23,12 @@ import contextlib import weakref import six -from tensorflow.python.data.experimental.ops import batching -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.ops import multi_device_iterator_ops from tensorflow.python.distribute import device_util from tensorflow.python.distribute import distribute_lib from tensorflow.python.distribute import distribution_strategy_context -from tensorflow.python.distribute import input_ops from tensorflow.python.distribute import reduce_util from tensorflow.python.eager import context from tensorflow.python.eager import tape -from tensorflow.python.framework import device as tf_device from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops @@ -1409,679 +1404,6 @@ def update_regroup(extended, device_map, updates, group): return nest.pack_sequence_as(regrouped, grouped_flat) -class InputWorkers(object): - """A 1-to-many mapping from input worker devices to compute devices.""" - - def __init__(self, device_map, worker_device_pairs=None, logical_device=0): - """Initialize an `InputWorkers` object. - - Args: - device_map: A `DeviceMap` with the computation devices fed by the - input workers. - worker_device_pairs: A sequence of pairs: - `(input device, a tuple of compute devices fed by that input device)`. - logical_device: The logical device of `device_map` to feed. - """ - self._device_map = device_map - self._logical_device = logical_device - if worker_device_pairs is None: - worker_device_pairs = (( - device_util.canonicalize("/device:CPU:0"), - device_map.logical_to_actual_devices(logical_device)),) - self._input_worker_devices = tuple(d for d, _ in worker_device_pairs) - self._fed_devices = tuple(tuple(device_util.canonicalize(d) for d in f) - for _, f in worker_device_pairs) - flattened = tuple(d for l in self._fed_devices for d in l) - assert (flattened == - device_map.logical_to_actual_devices(logical_device)), ( - "flattened: %s logical device %d: %s" % - (flattened, logical_device, - device_map.logical_to_actual_devices(logical_device))) - - @property - def device_map(self): - return self._device_map - - @property - def logical_device(self): - return self._logical_device - - @property - def num_workers(self): - return len(self._input_worker_devices) - - @property - def worker_devices(self): - return self._input_worker_devices - - def compute_devices_for_worker(self, worker_index): - return self._fed_devices[worker_index] - - def __repr__(self): - devices = self.worker_devices - debug_repr = ",\n".join(" %d %s: %s" % - (i, devices[i], self._fed_devices[i]) - for i in range(len(devices))) - return "%s:{\n%s\n device_map: %s}" % ( - self.__class__.__name__, debug_repr, self._device_map) - - -class PerReplicaDataIterator(object): - """An iterator (like `tf.data.Iterator`) into a `PerReplicaDataset`.""" - - def __init__(self, iterator, input_workers, worker_index, prefetch_on_device): - assert isinstance(input_workers, InputWorkers) - self._iterator = iterator - self._input_workers = input_workers - self._worker_index = worker_index - self._prefetch_on_device = prefetch_on_device - - @property - def initializer(self): - return self._iterator.initializer - - def get_next_as_list(self, name=None): - """Scatter the input across devices.""" - if self._prefetch_on_device: - data_list = self._iterator.get_next() - else: - batch = self._iterator.get_next(name=name) - data_list = [] - def get_ith(i): - return lambda x: x[i] - - devices = self._input_workers.compute_devices_for_worker( - self._worker_index) - for i, d in enumerate(devices): - v = nest.map_structure(get_ith(i), batch) - if context.executing_eagerly(): - with ops.device(d): - v = nest.map_structure(array_ops.identity, v) - data_list.append(v) - - return data_list - - def get_next(self, name=None): - assert self._input_workers.num_workers == 1 - data_list = self.get_next_as_list(name) - return regroup(self._input_workers.device_map, data_list) - - @property - def output_classes(self): - return self._iterator.output_classes - - @property - def output_shapes(self): - return self._iterator.output_shapes - - @property - def output_types(self): - return self._iterator.output_types - - -class PerReplicaDataset(object): - """Like `tf.data.Dataset` split devices, producing `PerReplica` data.""" - - def __init__(self, dataset, input_workers, worker_index, - prefetch_on_device=None): - assert isinstance(input_workers, InputWorkers) - assert worker_index is not None - assert worker_index is not True - assert worker_index is not False - self._input_workers = input_workers - self._worker_index = worker_index - - # Default to using prefetching, unless specified. - self._prefetch_on_device = prefetch_on_device - if self._prefetch_on_device is None: - self._prefetch_on_device = True - - self._dataset = dataset - if not self._prefetch_on_device: - # TODO(priyag): If dropping remainder is not appropriate, find another - # approach to distributing the dataset when not possible to divide evenly. - # Possibly not an issue when we start using PartitionedDataset. - num_replicas = len( - self._input_workers.compute_devices_for_worker(self._worker_index)) - self._dataset = self._dataset.batch(num_replicas, drop_remainder=True) - else: - self._replica_devices = self._input_workers.compute_devices_for_worker( - self._worker_index) - - def make_one_shot_iterator(self): - """Get a one time use iterator for the distributed PerReplicaDataset.""" - # Graph mode with one shot iterator is disabled. - if not context.executing_eagerly(): - raise ValueError("Cannot create a one shot iterator. Please use " - "`make_initializable_iterator()` instead.") - if self._prefetch_on_device: - dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator( - self._dataset, self._replica_devices) - else: - dataset_iterator = dataset_ops.make_one_shot_iterator(self._dataset) - return PerReplicaDataIterator( - dataset_iterator, - self._input_workers, - self._worker_index, - prefetch_on_device=self._prefetch_on_device) - - def make_initializable_iterator(self): - """Get an initializable iterator for the distributed PerReplicaDataset.""" - # Eager mode generates already initialized iterators. Hence we cannot create - # an initializable iterator. - if context.executing_eagerly(): - raise ValueError("Cannot create initializable iterator in Eager mode. " - "Please use `make_one_shot_iterator` instead.") - if self._prefetch_on_device: - dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator( - self._dataset, self._replica_devices) - else: - dataset_iterator = dataset_ops.make_initializable_iterator(self._dataset) - return PerReplicaDataIterator( - dataset_iterator, self._input_workers, self._worker_index, - prefetch_on_device=self._prefetch_on_device) - - -class MultiWorkerDataIterator(object): - """An iterator (like `tf.data.Iterator`) into a `MultiWorkerDataset`.""" - - def __init__(self, iterators, input_workers): - """Initialize the `MultiWorkerDataIterator` object. - - Args: - iterators: a list of worker, iterator pairs. - input_workers: an `InputWorkers` object. - - Raises: - ValueError: if iterators and input_workers are not compatible. - """ - assert isinstance(input_workers, InputWorkers) - workers = tuple(d for d, _ in iterators) - if workers != input_workers.worker_devices: - raise ValueError("iterators and input_workers are not compatible. " - "iterator workers: %r input_workers devices: %r" % - (workers, input_workers.worker_devices)) - self._iterators = tuple(i for _, i in iterators) - self._input_workers = input_workers - - @property - def initializer(self): - return control_flow_ops.group( - tuple(iterator.initializer for iterator in self._iterators)) - - def get_iterator(self, worker): - for i, w in enumerate(self._input_workers.worker_devices): - if worker == w: - return self._iterators[i] - return None - - @property - def output_shapes(self): - return self._iterators[0].output_shapes - - @property - def output_types(self): - return self._iterators[0].output_types - - def get_next(self, name=None): - """Scatter the input across hosts and devices.""" - replicas = [] - for worker, iterator in zip(self._input_workers.worker_devices, - self._iterators): - if name is not None: - d = tf_device.DeviceSpec.from_string(worker) - new_name = "%s_%s_%d" % (name, d.job, d.task) - else: - new_name = None - with ops.device(worker): - data_per_worker = iterator.get_next_as_list(name=new_name) - # Append to replicas to get a flat list of values indexed by replica. - replicas.extend(data_per_worker) - - return regroup(self._input_workers.device_map, replicas) - - -class MultiWorkerDataset(object): - """Like a `tf.data.Dataset` that distributes data to different workers. - - Each worker gets one shard of the input dataset. This currently does not work - in eager mode. - """ - - def __init__(self, dataset_fn, input_workers, prefetch_on_device=None, - auto_shard=False): - """Initialize the MultiWorkerDataset object. - - Args: - dataset_fn: a function or a list of functions that returns a - `tf.data.Dataset`. - input_workers: an `InputWorkers` object. - prefetch_on_device: whether to prefetch to devices. - auto_shard: whether to auto-shard the dataset. - """ - assert isinstance(input_workers, InputWorkers) - if isinstance(dataset_fn, (list, tuple)): - if len(dataset_fn) != input_workers.num_workers: - raise ValueError("If `dataset_fn` is a list, it must have one entry " - "per worker") - # TODO(rohanj): b/120673685 to track re-enabling auto sharding. - if auto_shard: - raise ValueError("Currently autosharding is not supported.") - self._input_workers = input_workers - self._datasets = [] - # TODO(yuefengz, priyag): support different set of jobs for input - # processing. - for i, worker in enumerate(input_workers.worker_devices): - with ops.device(worker): - if isinstance(dataset_fn, (list, tuple)): - worker_input = dataset_fn[i]() - else: - worker_input = dataset_fn() - dataset = PerReplicaDataset(worker_input, input_workers, i, - prefetch_on_device=prefetch_on_device) - self._datasets.append((worker, dataset)) - - def make_one_shot_iterator(self): - iterators = [] - for worker, dataset in self._datasets: - with ops.device(worker): - iterators.append((worker, dataset_ops.make_one_shot_iterator(dataset))) - return MultiWorkerDataIterator(iterators, self._input_workers) - - def make_initializable_iterator(self): - iterators = [] - for worker, dataset in self._datasets: - with ops.device(worker): - iterators.append( - (worker, dataset_ops.make_initializable_iterator(dataset))) - return MultiWorkerDataIterator(iterators, self._input_workers) - - -class InputIterator(object): - """An input iterator, intended to be passed to `DistributionStrategy.run`.""" - - def get_next(self): - """Returns the next inputs for all replicas.""" - raise NotImplementedError("must be implemented in descendants") - - def initialize(self): - """Initialize the underlying input dataset, when applicable. - - In eager mode, this will create a new iterator and return it. - In graph mode, this will initialize the same underlying iterator(s). - - Users are required to call this if - - This iterator was returned from a call to `make_input_fn_iterator` with an - input function that returns a dataset. - - Or this iterator was returned from a call to `make_dataset_iterator`. - - Returns: - A list of initialization ops to be executed. - """ - raise NotImplementedError("must be implemented in descendants") - - -class InputIteratorImpl(InputIterator): - """Common implementation for all input iterators.""" - - def __init__(self, input_workers, iterators): - assert isinstance(input_workers, InputWorkers) - if not input_workers.worker_devices: - raise ValueError("Should have at least one worker for input iterator.") - - self._iterators = iterators - self._input_workers = input_workers - - def get_next(self, name=None): - """Returns the next input from the iterator for all replicas.""" - replicas = [] - for i, worker in enumerate(self._input_workers.worker_devices): - if name is not None: - d = tf_device.DeviceSpec.from_string(worker) - new_name = "%s_%s_%d" % (name, d.job, d.task) - else: - new_name = None - with ops.device(worker): - # Make `replicas` a flat list of values across all replicas. - replicas.extend(self._iterators[i].get_next_as_list(new_name)) - - return regroup(self._input_workers.device_map, replicas) - - def initialize(self): - """Initialze underlying iterators. - - Returns: - A list of any initializer ops that should be run. - """ - init_ops = [] - for it in self._iterators: - init_ops.extend(it.initialize()) - return init_ops - - # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. - @property - def output_classes(self): - return self._iterators[0].output_classes - - # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. - @property - def output_shapes(self): - return self._iterators[0].output_shapes - - # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. - @property - def output_types(self): - return self._iterators[0].output_types - - # TODO(priyag): Remove when we switch to using `MultiDeviceIterator` for TPUs. - def get_iterator(self, worker): - for i, w in enumerate(self._input_workers.worker_devices): - if worker == w: - return self._iterators[i] - return None - - -class InputFunctionIterator(InputIteratorImpl): - """Iterator created from input function.""" - - def __init__(self, input_fn, input_workers, input_contexts): - """Make an iterator for input provided via an input function. - - Currently implements PER_WORKER mode, in which the `input_fn` is called - once on each worker. - - TODO(priyag): Add other replication modes. - TODO(priyag): Allow taking input function that returns a callable that - returns nest of tensors. - - Args: - input_fn: Input function that returns a `tf.data.Dataset` object. - input_workers: an `InputWorkers` object. - input_contexts: A list of `InputContext` instances to be passed to call(s) - to `input_fn`. Length and order should match worker order in - `worker_device_pairs`. - """ - assert isinstance(input_workers, InputWorkers) - if input_workers.num_workers != len(input_contexts): - raise ValueError( - "Number of input workers (%d) is not same as number of " - "input_contexts (%d)" % - (input_workers.num_workers, len(input_contexts))) - - iterators = [] - for i, ctx in enumerate(input_contexts): - worker = input_workers.worker_devices[i] - with ops.device(worker): - result = input_fn(ctx) - if not isinstance(result, dataset_ops.DatasetV2): - raise ValueError("input_fn must return a tf.data.Dataset.") - devices = input_workers.compute_devices_for_worker(i) - iterator = _SingleWorkerDatasetIterator(result, worker, devices) - iterators.append(iterator) - - super(InputFunctionIterator, self).__init__(input_workers, iterators) - - -class DatasetIterator(InputIteratorImpl): - """Iterator created from input dataset.""" - - def __init__(self, dataset, input_workers, split_batch_by=None): - """Make an iterator for the dataset on given devices. - - If `split_batch_by` is not None, we "split" each batch of the - dataset by `split_batch_by` value. To achieve this, we first unbatch the - input dataset and then rebatch it with the per replica batch size that is - calculated using `global_batch_size // split_batch_by`. - The currently supported datasets are as follows: - `dataset.batch()` is the last operation on the dataset OR - `dataset.apply(map_and_batch)` is the last operation on the dataset OR - `dataset.batch().prefetch()` are the last 2 operations on the dataset OR - `dataset.apply(map_and_batch).prefetch()` are the last 2 operations. - - TODO(priyag): Support multi worker / host cases properly by cloning - and sharding the dataset on each worker. Current setup will only work in - some cases, such as in-graph multi worker GPU case. If the input pipeline - has random shuffling (with a different seed on each worker), each worker - will see random input from the same overall dataset in each step. Otherwise, - each worker will see the same input in each step. - - Args: - dataset: `tf.data.Dataset` that will be used as the input source. - input_workers: an `InputWorkers` object. - split_batch_by: Optional integer. If present, we "split" each batch of the - dataset by `split_batch_by` value. - """ - assert isinstance(input_workers, InputWorkers) - if split_batch_by: - dataset = _split_dataset_batch(dataset, split_batch_by) - - iterators = [] - for i, worker in enumerate(input_workers.worker_devices): - with ops.device(worker): - worker_devices = input_workers.compute_devices_for_worker(i) - cloned_dataset = dataset - if not context.executing_eagerly(): - cloned_dataset = input_ops._clone_dataset(dataset) # pylint: disable=protected-access - iterator = _SingleWorkerDatasetIterator(cloned_dataset, worker, - worker_devices) - iterators.append(iterator) - - super(DatasetIterator, self).__init__(input_workers, iterators) - - -class _SingleWorkerDatasetIterator(object): - """Iterator for a single `tf.data.Dataset`.""" - - def __init__(self, dataset, worker, devices): - """Create iterator for the `dataset` to fetch data to worker's `devices` . - - `MultiDeviceIterator` is used to prefetch input to the devices on the - given worker. - - Args: - dataset: A `tf.data.Dataset` instance. - worker: Worker on which ops should be created. - devices: Distribute data from `dataset` to these devices. - """ - self._dataset = dataset - self._worker = worker - self._devices = devices - self._make_iterator() - - def _make_iterator(self): - """Make appropriate iterator on the dataset.""" - with ops.device(self._worker): - self._iterator = multi_device_iterator_ops.MultiDeviceIterator( - self._dataset, self._devices) - - def get_next_as_list(self, name=None): - """Get next element from the underlying iterator.""" - del name - with ops.device(self._worker): - data_list = self._iterator.get_next() - return data_list - - def initialize(self): - """Initialze underlying iterator. - - In eager execution, this simply recreates the underlying iterator. - In graph execution, it returns the initializer ops for the underlying - iterator. - - Returns: - A list of any initializer ops that should be run. - """ - if context.executing_eagerly(): - self._make_iterator() - return [] - else: - return [self._iterator.initializer] - - @property - def output_classes(self): - return self._iterator.output_classes - - @property - def output_shapes(self): - return self._iterator.output_shapes - - @property - def output_types(self): - return self._iterator.output_types - - -def _split_dataset_batch(dataset, split_batch_by): - """Divide a batch-ed dataset's batches into smaller batches.""" - # TODO(sourabhbajaj): Remove this in lieu of distributed datasets - # pylint: disable=protected-access - def _get_batch_dataset(d): - """Get the underlying batch dataset from the dataset object.""" - if isinstance(d, dataset_ops.DatasetV1Adapter): - d = d._dataset - - if isinstance(d, (dataset_ops.BatchDataset, batching._MapAndBatchDataset)): - return d - elif isinstance(d, dataset_ops.PrefetchDataset): - return _get_batch_dataset(d._input_dataset) - raise ValueError( - "Unable to get batched dataset from the input dataset. `batch` " - "`map_and_batch` need to be the last operations on the dataset. " - "The batch operations can be followed by a prefetch.") - - batched_dataset = _get_batch_dataset(dataset) - if isinstance(batched_dataset, dataset_ops.BatchDataset): - batch_size = batched_dataset._batch_size - drop_remainder = batched_dataset._drop_remainder - elif isinstance(batched_dataset, batching._MapAndBatchDataset): - batch_size = batched_dataset._batch_size_t - drop_remainder = batched_dataset._drop_remainder_t - - prefetch_buffer = None - if isinstance(dataset, dataset_ops.PrefetchDataset): - prefetch_buffer = dataset._buffer_size - elif (isinstance(dataset, dataset_ops.DatasetV1Adapter) - and isinstance(dataset._dataset, dataset_ops.PrefetchDataset)): - prefetch_buffer = dataset._dataset._buffer_size - # pylint: enable=protected-access - - if tensor_util.is_tensor(batch_size): - batch_size = tensor_util.constant_value(batch_size) - - if tensor_util.is_tensor(drop_remainder): - drop_remainder = tensor_util.constant_value(drop_remainder) - - if batch_size % split_batch_by: - raise ValueError( - "Batch size %s cannot be sharded evenly across replicas %s" % ( - batch_size, split_batch_by)) - new_batch_size = batch_size // split_batch_by - - dataset = dataset.apply(batching.unbatch()) - dataset = dataset.batch(new_batch_size, drop_remainder=drop_remainder) - if prefetch_buffer is not None: - dataset = dataset.prefetch(prefetch_buffer) - return dataset - - -class MultiStepContext(object): - """A context object that can be used to capture things when running steps. - - This context object is useful when running multiple steps at a time using the - `experimental_run_steps_on_iterator` API. For e.g. it allows the user's step - function to specify which outputs to emit at what frequency. Currently it - supports capturing output from the last step, as well as capturing non tensor - outputs. In the future it will be augmented to support other use cases such - as output each N steps. - """ - - def __init__(self): - """Initialize an output context. - - Returns: - A context object. - """ - self._last_step_outputs = {} - self._last_step_outputs_reduce_ops = {} - self._non_tensor_outputs = {} - - @property - def last_step_outputs(self): - """A dictionary consisting of outputs to be captured on last step. - - Keys in the dictionary are names of tensors to be captured, as specified - when `set_last_step_output` is called. - Values in the dictionary are the tensors themselves. If - `set_last_step_output` was called with a `reduce_op` for this output, - then the value is the reduced value. - - Returns: - A dictionary with last step outputs. - """ - return self._last_step_outputs - - def _set_last_step_outputs(self, outputs): - """Replace the entire dictionary of last step outputs.""" - if not isinstance(outputs, dict): - raise ValueError("Need a dictionary to set last_step_outputs.") - self._last_step_outputs = outputs - - def set_last_step_output(self, name, output, reduce_op=None): - """Set `output` with `name` to be outputted from the last step. - - Args: - name: String, name to identify the output. Doesn't need to match tensor - name. - output: The tensors that should be outputted with `name`. See below for - actual types supported. - reduce_op: Reduction method to use to reduce outputs from multiple - replicas. Required if `set_last_step_output` is called in a replica - context. Optional in cross_replica_context. - When present, the outputs from all the replicas are reduced using the - current distribution strategy's `reduce` method. Hence, the type of - `output` must be what's supported by the corresponding `reduce` method. - For e.g. if using MirroredStrategy and reduction is set, output - must be a `PerReplica` value. - The reduce method is also recorded in a dictionary - `_last_step_outputs_reduce_ops` for later interpreting of the - outputs as already reduced or not. - """ - if distribution_strategy_context.in_cross_replica_context(): - self._last_step_outputs_reduce_ops[name] = reduce_op - if reduce_op is None: - self._last_step_outputs[name] = output - else: - distribution = distribution_strategy_context.get_distribution_strategy() - self._last_step_outputs[name] = distribution.reduce(reduce_op, output) - else: - assert reduce_op is not None - def merge_fn(distribution, value): - self._last_step_outputs[name] = distribution.reduce(reduce_op, value) - # Setting this inside the `merge_fn` because all replicas share the same - # context object, so it's more robust to set it only once (even if all - # the replicas are trying to set the same value). - self._last_step_outputs_reduce_ops[name] = reduce_op - - distribution_strategy_context.get_replica_context().merge_call( - merge_fn, args=(output,)) - - @property - def non_tensor_outputs(self): - """A dictionary consisting of any non tensor outputs to be captured.""" - return self._non_tensor_outputs - - def set_non_tensor_output(self, name, output): - """Set `output` with `name` to be captured as a non tensor output.""" - if distribution_strategy_context.in_cross_replica_context(): - self._non_tensor_outputs[name] = output - else: - def merge_fn(distribution, value): - # NOTE(priyag): For non tensor outputs, we simply return all the values - # in a list as reduction doesn't make sense on non tensors. - self._non_tensor_outputs[name] = distribution.unwrap(value) - distribution_strategy_context.get_replica_context().merge_call( - merge_fn, args=(output,)) - - def value_container(val): """Returns the container that this per-replica `value` belongs to. -- GitLab From 9fb415c6eac3cd245de736e7ebc54094416c1013 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 30 Dec 2018 01:07:37 +0000 Subject: [PATCH 0080/1095] Fix TypeError when using tf.keras.utils.plot_model This fix fixes the issue raised in 24622 where a TypeError was raised when using tf.keras.utils.plot_model. This fix fixes 24622. Signed-off-by: Yong Tang --- tensorflow/python/keras/utils/vis_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/utils/vis_utils.py b/tensorflow/python/keras/utils/vis_utils.py index 82bc2755bd..fa848ae8a7 100644 --- a/tensorflow/python/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/utils/vis_utils.py @@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): for i, node in enumerate(layer._inbound_nodes): node_key = layer.name + '_ib-' + str(i) if node_key in model._network_nodes: # pylint: disable=protected-access - for inbound_layer in node.inbound_layers: + for inbound_layer in nest.flatten(node.inbound_layers): inbound_layer_id = str(id(inbound_layer)) layer_id = str(id(layer)) dot.add_edge(pydot.Edge(inbound_layer_id, layer_id)) -- GitLab From 129bf6c79a6793951d0110d31b1b2c0198479fff Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 30 Dec 2018 01:09:11 +0000 Subject: [PATCH 0081/1095] Add missing python import Signed-off-by: Yong Tang --- tensorflow/python/keras/utils/vis_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/keras/utils/vis_utils.py b/tensorflow/python/keras/utils/vis_utils.py index fa848ae8a7..c7c45f381e 100644 --- a/tensorflow/python/keras/utils/vis_utils.py +++ b/tensorflow/python/keras/utils/vis_utils.py @@ -67,6 +67,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'): """ from tensorflow.python.keras.layers.wrappers import Wrapper from tensorflow.python.keras.models import Sequential + from tensorflow.python.util import nest _check_pydot() dot = pydot.Dot() -- GitLab From 3849a1b682717226ce011718ea382be9a24b4b0c Mon Sep 17 00:00:00 2001 From: manhyuk Date: Sun, 30 Dec 2018 15:33:29 +0900 Subject: [PATCH 0082/1095] fix typo --- tensorflow/core/framework/model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/model.h b/tensorflow/core/framework/model.h index bb1d7b6bff..d9f7185362 100644 --- a/tensorflow/core/framework/model.h +++ b/tensorflow/core/framework/model.h @@ -310,7 +310,7 @@ class Node { std::map> parameters_ GUARDED_BY(mu_); std::list> inputs_ GUARDED_BY(mu_); - // The reference to the output node is not owned so that that deletion of a + // The reference to the output node is not owned so that deletion of a // node results in recursive deletion of the subtree rooted in the node. Node* const output_; }; -- GitLab From a97dbfcf9b727a9eeec81e4a19ecf56a36e84a0c Mon Sep 17 00:00:00 2001 From: manhyuk Date: Sun, 30 Dec 2018 15:33:39 +0900 Subject: [PATCH 0083/1095] fix typo --- tensorflow/core/kernels/cudnn_rnn_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/cudnn_rnn_ops.cc b/tensorflow/core/kernels/cudnn_rnn_ops.cc index d37f5fb9da..196494cbcf 100644 --- a/tensorflow/core/kernels/cudnn_rnn_ops.cc +++ b/tensorflow/core/kernels/cudnn_rnn_ops.cc @@ -743,7 +743,7 @@ Status DoBackward( /* forward inputs */ const Tensor* input, const Tensor* input_h, const Tensor* input_c, const Tensor* params, - /* forward outptus */ + /* forward outputs */ const Tensor* output, const Tensor* output_h, const Tensor* output_c, /* backprop inputs */ const Tensor* output_backprop, const Tensor* output_h_backprop, -- GitLab From bbc8c15b85358b9df029f16e31bc663b8f9d1fef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 30 Dec 2019 01:02:39 -0800 Subject: [PATCH 0084/1095] compat: Update forward compatibility horizon to 2018-12-30 PiperOrigin-RevId: 227267901 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 3a9b3b3838..c41e58a823 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 29) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 30) @tf_export("compat.forward_compatible") -- GitLab From 53e677de105c2edbfdc2d86ceb31d06765b08512 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 30 Dec 2018 10:07:06 +0000 Subject: [PATCH 0085/1095] Fix incorrect display of keras model summary This fix fixes the issue raised in 24627 where the display of keras model summary is incorrect (regression from 1.12). The reason was that `layer.name` (vs. layer object itself) should be used, This fix fixes 24627. Signed-off-by: Yong Tang --- tensorflow/python/keras/utils/layer_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/utils/layer_utils.py b/tensorflow/python/keras/utils/layer_utils.py index ead5afd1ae..1d85e8a25f 100644 --- a/tensorflow/python/keras/utils/layer_utils.py +++ b/tensorflow/python/keras/utils/layer_utils.py @@ -196,7 +196,7 @@ def print_summary(model, line_length=None, positions=None, print_fn=None): continue for inbound_layer, node_index, tensor_index, _ in node.iterate_inbound(): - connections.append('{}[{}][{}]'.format(inbound_layer, node_index, + connections.append('{}[{}][{}]'.format(inbound_layer.name, node_index, tensor_index)) name = layer.name -- GitLab From c985bd0dce0f8a7ccf334c9782d051c81ad00f1d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 30 Dec 2019 02:54:20 -0800 Subject: [PATCH 0086/1095] Standardize some names from `TFGAN` -> `TF-GAN`. A noop. PiperOrigin-RevId: 227273663 --- tensorflow/contrib/gan/BUILD | 2 +- .../estimator/python/gan_estimator_impl.py | 16 +++++++------- .../gan/python/losses/python/losses_impl.py | 4 ++-- .../python/losses/python/tuple_losses_impl.py | 2 +- tensorflow/contrib/gan/python/namedtuples.py | 6 ++--- tensorflow/contrib/gan/python/train.py | 22 ++++++++++--------- 6 files changed, 27 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index ae8320cfb2..e1d198fe7b 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -1,4 +1,4 @@ -# Files for using TFGAN framework. +# Files for using TF-GAN framework. load("//tensorflow:tensorflow.bzl", "py_test") package(default_visibility = [ diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py index adb7222821..dd904611d1 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A TFGAN-backed GAN Estimator.""" +"""A TF-GAN-backed GAN Estimator.""" from __future__ import absolute_import from __future__ import division @@ -56,10 +56,10 @@ _summary_type_map = { class GANEstimator(estimator.Estimator): """An estimator for Generative Adversarial Networks (GANs). - This Estimator is backed by TFGAN. The network functions follow the TFGAN API - except for one exception: if either `generator_fn` or `discriminator_fn` have - an argument called `mode`, then the tf.Estimator mode is passed in for that - argument. This helps with operations like batch normalization, which have + This Estimator is backed by TF-GAN. The network functions follow the TF-GAN + API except for one exception: if either `generator_fn` or `discriminator_fn` + have an argument called `mode`, then the tf.Estimator mode is passed in for + that argument. This helps with operations like batch normalization, which have different train and evaluation behavior. Example: @@ -68,7 +68,7 @@ class GANEstimator(estimator.Estimator): import tensorflow as tf tfgan = tf.contrib.gan - # See TFGAN's `train.py` for a description of the generator and + # See TF-GAN's `train.py` for a description of the generator and # discriminator API. def generator_fn(generator_inputs): ... @@ -123,13 +123,13 @@ class GANEstimator(estimator.Estimator): to continue training a previously saved model. generator_fn: A python function that takes a Tensor, Tensor list, or Tensor dictionary as inputs and returns the outputs of the GAN - generator. See `TFGAN` for more details and examples. Additionally, if + generator. See `TF-GAN` for more details and examples. Additionally, if it has an argument called `mode`, the Estimator's `mode` will be passed in (ex TRAIN, EVAL, PREDICT). This is useful for things like batch normalization. discriminator_fn: A python function that takes the output of `generator_fn` or real data in the GAN setup, and `generator_inputs`. - Outputs a Tensor in the range [-inf, inf]. See `TFGAN` for more details + Outputs a Tensor in the range [-inf, inf]. See `TF-GAN` for more details and examples. generator_loss_fn: The loss function on the generator. Takes a `GANModel` tuple. diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl.py b/tensorflow/contrib/gan/python/losses/python/losses_impl.py index a0a86c6337..930c91786f 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl.py @@ -28,7 +28,7 @@ wasserstein_gradient_penalty All losses must be able to accept 1D or 2D Tensors, so as to be compatible with patchGAN style losses (https://arxiv.org/abs/1611.07004). -To make these losses usable in the TFGAN framework, please create a tuple +To make these losses usable in the TF-GAN framework, please create a tuple version of the losses with `losses_utils.py`. """ @@ -320,7 +320,7 @@ def wasserstein_gradient_penalty( generated_data: Output of the generator. generator_inputs: Exact argument to pass to the generator, which is used as optional conditioning to the discriminator. - discriminator_fn: A discriminator function that conforms to TFGAN API. + discriminator_fn: A discriminator function that conforms to TF-GAN API. discriminator_scope: If not `None`, reuse discriminators from this scope. epsilon: A small positive number added for numerical stability when computing the gradient norm. diff --git a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py index 221c70c38b..76e57df7f6 100644 --- a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TFGAN utilities for loss functions that accept GANModel namedtuples. +"""TF-GAN utilities for loss functions that accept GANModel namedtuples. The losses and penalties in this file all correspond to losses in `losses_impl.py`. Losses in that file take individual arguments, whereas in this diff --git a/tensorflow/contrib/gan/python/namedtuples.py b/tensorflow/contrib/gan/python/namedtuples.py index 969b68449d..73dfee4fde 100644 --- a/tensorflow/contrib/gan/python/namedtuples.py +++ b/tensorflow/contrib/gan/python/namedtuples.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Named tuples for TFGAN. +"""Named tuples for TF-GAN. -TFGAN training occurs in four steps, and each step communicates with the next -step via one of these named tuples. At each step, you can either use a TFGAN +TF-GAN training occurs in four steps, and each step communicates with the next +step via one of these named tuples. At each step, you can either use a TF-GAN helper function in `train.py`, or you can manually construct a tuple. """ diff --git a/tensorflow/contrib/gan/python/train.py b/tensorflow/contrib/gan/python/train.py index 4c7bee41b3..f36a5d346e 100644 --- a/tensorflow/contrib/gan/python/train.py +++ b/tensorflow/contrib/gan/python/train.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""The TFGAN project provides a lightweight GAN training/testing framework. +"""The TF-GAN project provides a lightweight GAN training/testing framework. This file contains the core helper functions to create and train a GAN model. See the README or examples in `tensorflow_models` for details on how to use. -TFGAN training occurs in four steps: +TF-GAN training occurs in four steps: 1) Create a model 2) Add a loss 3) Create train ops @@ -645,9 +645,10 @@ def gan_loss( type(model)) # Optionally create pooled model. - pooled_model = ( - _tensor_pool_adjusted_model(model, tensor_pool_fn) - if tensor_pool_fn else model) + if tensor_pool_fn: + pooled_model = _tensor_pool_adjusted_model(model, tensor_pool_fn) + else: + pooled_model = model # Create standard losses. gen_loss = generator_loss_fn(model, add_summaries=add_summaries) @@ -665,10 +666,11 @@ def gan_loss( if _use_aux_loss(mutual_information_penalty_weight): gen_info_loss = tfgan_losses.mutual_information_penalty( model, add_summaries=add_summaries) - dis_info_loss = ( - gen_info_loss - if tensor_pool_fn is None else tfgan_losses.mutual_information_penalty( - pooled_model, add_summaries=add_summaries)) + if tensor_pool_fn is None: + dis_info_loss = gen_info_loss + else: + dis_info_loss = tfgan_losses.mutual_information_penalty( + pooled_model, add_summaries=add_summaries) gen_loss += mutual_information_penalty_weight * gen_info_loss dis_loss += mutual_information_penalty_weight * dis_info_loss if _use_aux_loss(aux_cond_generator_weight): @@ -929,7 +931,7 @@ def gan_train_ops( **kwargs): """Returns GAN train ops. - The highest-level call in TFGAN. It is composed of functions that can also + The highest-level call in TF-GAN. It is composed of functions that can also be called, should a user require more control over some part of the GAN training process. -- GitLab From cb86c83b06bdb0ad04d8c383515fbab5999dcfdc Mon Sep 17 00:00:00 2001 From: David Norman Date: Sun, 30 Dec 2018 12:04:13 +0000 Subject: [PATCH 0087/1095] Allow the reducewindow -> reduce optimization to be disabled --- .../xla/service/algebraic_simplifier.cc | 86 ++++++++++--------- .../xla/service/algebraic_simplifier.h | 13 +++ 2 files changed, 57 insertions(+), 42 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index cad70a8d10..137e86216f 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2595,51 +2595,53 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( function)); } - // A reduce window can be expressed as a reduce and a reshape if all - // dimensions either have a window size of one or the entire dimension. If - // there is no stride, dilation, or padding, this is as easy as checking the - // size of the output shape and window dimension. - // - // The reshape is a bitcast since it adds one-sized dimensions. Often these - // ones are immediately removed as well with another reshape. The - // implementation of reduce tends to be slightly more efficient at reducing - // entire dimensions compared to reduce window. - auto effective_reduce_dims = [&] { - if (window_util::HasStride(window) || window_util::HasDilation(window) || - window_util::HasPadding(window)) { - return absl::InlinedVector{}; - } - absl::InlinedVector reduce_dims; - for (int64 i = 0; i < window.dimensions_size(); ++i) { - if (window.dimensions(i).size() == 1) { - continue; - } else if (reduce_window->shape().dimensions(i) == 1) { - reduce_dims.push_back(i); - } else { + if (options_.enable_window_reduce_to_reduce_replacement()) { + // A reduce window can be expressed as a reduce and a reshape if all + // dimensions either have a window size of one or the entire dimension. If + // there is no stride, dilation, or padding, this is as easy as checking the + // size of the output shape and window dimension. + // + // The reshape is a bitcast since it adds one-sized dimensions. Often these + // ones are immediately removed as well with another reshape. The + // implementation of reduce tends to be slightly more efficient at reducing + // entire dimensions compared to reduce window. + auto effective_reduce_dims = [&] { + if (window_util::HasStride(window) || window_util::HasDilation(window) || + window_util::HasPadding(window)) { return absl::InlinedVector{}; } - } - return reduce_dims; - }(); + absl::InlinedVector reduce_dims; + for (int64 i = 0; i < window.dimensions_size(); ++i) { + if (window.dimensions(i).size() == 1) { + continue; + } else if (reduce_window->shape().dimensions(i) == 1) { + reduce_dims.push_back(i); + } else { + return absl::InlinedVector{}; + } + } + return reduce_dims; + }(); - // If a reduce window can be expressed as a reduce, do so and reshape the - // output. - if (!effective_reduce_dims.empty()) { - Shape reduce_shape = ShapeUtil::FilterDimensions( - [&](int64 dim) { - return !absl::c_linear_search(effective_reduce_dims, dim); - }, - reduce_window->shape()); - HloInstruction* reduce = - computation_->AddInstruction(HloInstruction::CreateReduce( - /*shape=*/reduce_shape, - /*operand=*/operand, - /*init_value=*/reduce_window->mutable_operand(1), - /*dimensions_to_reduce=*/effective_reduce_dims, - /*reduce_computation=*/function)); - return ReplaceWithNewInstruction( - reduce_window, - HloInstruction::CreateReshape(reduce_window->shape(), reduce)); + // If a reduce window can be expressed as a reduce, do so and reshape the + // output. + if (!effective_reduce_dims.empty()) { + Shape reduce_shape = ShapeUtil::FilterDimensions( + [&](int64 dim) { + return !absl::c_linear_search(effective_reduce_dims, dim); + }, + reduce_window->shape()); + HloInstruction* reduce = + computation_->AddInstruction(HloInstruction::CreateReduce( + /*shape=*/reduce_shape, + /*operand=*/operand, + /*init_value=*/reduce_window->mutable_operand(1), + /*dimensions_to_reduce=*/effective_reduce_dims, + /*reduce_computation=*/function)); + return ReplaceWithNewInstruction( + reduce_window, + HloInstruction::CreateReshape(reduce_window->shape(), reduce)); + } } // This optimization folds a pad op into reduce_window. diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.h b/tensorflow/compiler/xla/service/algebraic_simplifier.h index d2775b9faf..695fc3af3e 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.h +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.h @@ -75,12 +75,25 @@ class AlgebraicSimplifierOptions { return enable_permutation_sort_replacement_; } + // If enable_window_reduce_replacement is true, the kReduceWindow instruction + // can be optimized by replacement with simpler operations. + void set_enable_window_reduce_to_reduce_replacement( + bool enable_window_reduce_to_reduce_replacement) { + enable_window_reduce_to_reduce_replacement_ = + enable_window_reduce_to_reduce_replacement; + } + bool enable_window_reduce_to_reduce_replacement() const { + return enable_window_reduce_to_reduce_replacement_; + } + + private: ValidBitcastCallback valid_bitcast_callback_; bool is_layout_sensitive_{false}; bool enable_dot_strength_reduction_{true}; bool enable_conv_simplification_{true}; bool enable_permutation_sort_replacement_{false}; + bool enable_window_reduce_to_reduce_replacement_{true}; }; // A pass which performs algebraic simplifications. -- GitLab From 44bb7adf1d04f090817f008fc47e4d81a23b1049 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 30 Dec 2019 06:18:15 -0800 Subject: [PATCH 0088/1095] [tfgan/tpu] Add loss summaries during TPUGANEstimator training if not on TPU. PiperOrigin-RevId: 227283183 --- .../gan/python/estimator/python/tpu_gan_estimator_impl.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py index 295d1382e2..bf51b7fc45 100644 --- a/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py @@ -294,9 +294,10 @@ def _get_estimator_spec( gan_model, gan_loss, gan_loss_no_reduction, get_eval_metric_ops_fn) else: # model_fn_lib.ModeKeys.TRAIN: gan_loss = tfgan_tuples.GANLoss( - generator_loss=generator_loss_fn(gan_model, add_summaries=False), + generator_loss=generator_loss_fn( + gan_model, add_summaries=not is_on_tpu), discriminator_loss=discriminator_loss_fn( - gan_model, add_summaries=False)) + gan_model, add_summaries=not is_on_tpu)) # Construct optimizers if arguments were callable. For TPUs, they must be # `CrossShardOptimizer`. -- GitLab From 8f60a381d210478f21762a6cf14f547a05e98878 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Mon, 30 Dec 2019 09:31:02 -0800 Subject: [PATCH 0089/1095] Replace py_test with tf_py_test in more BUILD files. This will allow some easier refactoring for python test dependencies. Also, looking at impl, I feel like there is almost no case in which using tensorflow.bzl:py_test is preferable to tensorflow.bzl:tf_py_test. PiperOrigin-RevId: 227291326 --- tensorflow/python/autograph/impl/BUILD | 16 +- tensorflow/python/eager/BUILD | 29 +- tensorflow/python/feature_column/BUILD | 38 +- tensorflow/python/keras/BUILD | 541 ++++++++---------- tensorflow/python/saved_model/BUILD | 53 +- .../python/training/checkpointable/BUILD | 39 +- 6 files changed, 320 insertions(+), 396 deletions(-) diff --git a/tensorflow/python/autograph/impl/BUILD b/tensorflow/python/autograph/impl/BUILD index 201a888754..66f7915696 100644 --- a/tensorflow/python/autograph/impl/BUILD +++ b/tensorflow/python/autograph/impl/BUILD @@ -1,6 +1,6 @@ licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_test") filegroup( name = "all_files", @@ -37,25 +37,23 @@ py_library( ], ) -py_test( +tf_py_test( name = "api_test", srcs = ["api_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":impl", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python/autograph/utils", - "//third_party/py/numpy", ], ) -py_test( +tf_py_test( name = "conversion_test", srcs = ["conversion_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":impl", - "//tensorflow/python:client_testlib", "@gast_archive//:gast", + "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index cd5c0be283..1f1cb22d58 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -1,6 +1,6 @@ licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl", "py_test", "tf_cc_binary") +load("//tensorflow:tensorflow.bzl", "tf_py_test", "tf_cc_binary") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load( "//tensorflow/tools/test:performance.bzl", @@ -255,11 +255,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "execution_callbacks_test", srcs = ["execution_callbacks_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":execution_callbacks", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_ops", @@ -373,11 +372,10 @@ tf_py_logged_benchmark( target = "//tensorflow/python/eager:benchmarks_test", ) -py_test( +tf_py_test( name = "tape_test", srcs = ["tape_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":backprop", ":context", ":test", @@ -414,20 +412,19 @@ cuda_py_test( ], ) -py_test( +tf_py_test( name = "pywrap_tfe_test", srcs = ["pywrap_tfe_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":backprop", ":context", ":core", ":test", + "//third_party/py/numpy", "//tensorflow/python:framework_test_lib", "//tensorflow/python:math_ops", "//tensorflow/python:pywrap_tensorflow", "//tensorflow/python:random_ops", - "//third_party/py/numpy", ], ) @@ -491,11 +488,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "def_function_test", srcs = ["def_function_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":def_function", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", @@ -519,11 +515,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "wrap_function_test", srcs = ["wrap_function_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":wrap_function", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_ops", diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index d24a7ae80c..2b986348b7 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -4,7 +4,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_test") py_library( name = "feature_column_py", @@ -94,19 +94,13 @@ filegroup( ], ) -py_test( +tf_py_test( name = "feature_column_test", srcs = ["feature_column_test.py"], - data = [":vocabulary_testdata"], - srcs_version = "PY2AND3", - tags = [ - "no_cuda_on_cpu_tap", - "no_pip", - "no_windows", - ], - deps = [ + additional_deps = [ ":feature_column", ":feature_column_py", + "//third_party/py/numpy", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -125,24 +119,22 @@ py_test( "//tensorflow/python:variables", "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", - "//third_party/py/numpy", ], -) - -py_test( - name = "feature_column_v2_test", - srcs = ["feature_column_v2_test.py"], data = [":vocabulary_testdata"], - shard_count = 5, - srcs_version = "PY2AND3", tags = [ "no_cuda_on_cpu_tap", "no_pip", "no_windows", ], - deps = [ +) + +tf_py_test( + name = "feature_column_v2_test", + srcs = ["feature_column_v2_test.py"], + additional_deps = [ ":feature_column_py", ":feature_column_v2", + "//third_party/py/numpy", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", @@ -162,6 +154,12 @@ py_test( "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/estimator:estimator_py", - "//third_party/py/numpy", + ], + data = [":vocabulary_testdata"], + shard_count = 5, + tags = [ + "no_cuda_on_cpu_tap", + "no_pip", + "no_windows", ], ) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index d833ae0fa2..914568f652 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -7,7 +7,7 @@ package(default_visibility = ["//visibility:public"]) exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_test") config_setting( @@ -304,180 +304,167 @@ py_library( ], ) -py_test( +tf_py_test( name = "integration_test", size = "medium", srcs = ["integration_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:layers", "//tensorflow/python:nn", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], + tags = ["notsan"], ) -py_test( +tf_py_test( name = "activations_test", size = "small", srcs = ["activations_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "constraints_test", size = "small", srcs = ["constraints_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "initializers_test", size = "small", srcs = ["initializers_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:init_ops", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], ) -py_test( +tf_py_test( name = "regularizers_test", size = "small", srcs = ["regularizers_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "@absl_py//absl/testing:parameterized", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "optimizers_test", size = "medium", srcs = ["optimizers_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:training", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], + shard_count = 2, + tags = ["notsan"], ) -py_test( +tf_py_test( name = "losses_test", size = "small", srcs = ["losses_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "metrics_functional_test", size = "medium", srcs = ["metrics_functional_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "metrics_test", size = "medium", srcs = ["metrics_test.py"], - shard_count = 4, - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 4, ) -py_test( +tf_py_test( name = "applications_test", size = "enormous", srcs = ["applications/applications_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "@absl_py//absl/testing:parameterized", + "//tensorflow/python:client_testlib", ], + shard_count = 2, ) -py_test( +tf_py_test( name = "advanced_activations_test", size = "medium", srcs = ["layers/advanced_activations_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "@absl_py//absl/testing:parameterized", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "convolutional_recurrent_test", size = "large", srcs = ["layers/convolutional_recurrent_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 2, ) -py_test( +tf_py_test( name = "convolutional_test", size = "large", srcs = ["layers/convolutional_test.py"], - shard_count = 11, - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 11, ) cuda_py_test( @@ -494,30 +481,28 @@ cuda_py_test( tags = ["no_windows_gpu"], ) -py_test( +tf_py_test( name = "pooling_test", size = "large", srcs = ["layers/pooling_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "@absl_py//absl/testing:parameterized", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "core_test", size = "medium", srcs = ["layers/core_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 3, ) cuda_py_test( @@ -531,121 +516,113 @@ cuda_py_test( ], ) -py_test( +tf_py_test( name = "local_test", size = "medium", srcs = ["layers/local_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - tags = ["no_windows"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 2, + tags = ["no_windows"], ) -py_test( +tf_py_test( name = "merge_test", size = "small", srcs = ["layers/merge_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "noise_test", size = "small", srcs = ["layers/noise_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "@absl_py//absl/testing:parameterized", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "normalization_test", size = "medium", srcs = ["layers/normalization_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 3, + tags = ["notsan"], ) -py_test( +tf_py_test( name = "simplernn_test", size = "medium", srcs = ["layers/simplernn_test.py"], - shard_count = 4, - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 4, + tags = ["notsan"], ) -py_test( +tf_py_test( name = "gru_test", size = "large", srcs = ["layers/gru_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - tags = ["notsan"], # http://b/62136390 - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 2, + tags = ["notsan"], # http://b/62136390 ) -py_test( +tf_py_test( name = "lstm_test", size = "medium", srcs = ["layers/lstm_test.py"], + additional_deps = [ + ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], shard_count = 4, - srcs_version = "PY2AND3", tags = [ "noasan", # times out b/63678675 "notsan", # http://b/62189182 ], - deps = [ - ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], ) -py_test( +tf_py_test( name = "recurrent_test", size = "medium", srcs = ["layers/recurrent_test.py"], - shard_count = 4, - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 4, ) cuda_py_test( @@ -674,55 +651,57 @@ cuda_py_test( shard_count = 6, ) -py_test( +tf_py_test( name = "serialization_test", size = "small", srcs = ["layers/serialization_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "@absl_py//absl/testing:parameterized", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "wrappers_test", size = "medium", srcs = ["layers/wrappers_test.py"], + additional_deps = [ + ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], shard_count = 4, - srcs_version = "PY2AND3", tags = [ "noasan", # http://b/78599823 "notsan", ], - deps = [ - ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], ) -py_test( +tf_py_test( name = "scikit_learn_test", size = "small", srcs = ["wrappers/scikit_learn_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + tags = ["notsan"], ) -py_test( +tf_py_test( name = "data_utils_test", size = "large", srcs = ["utils/data_utils_test.py"], - srcs_version = "PY2AND3", + additional_deps = [ + ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], tags = [ "no_oss", "no_windows", @@ -730,64 +709,54 @@ py_test( "notsan", "optonly", # times out ], - deps = [ - ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], ) -py_test( +tf_py_test( name = "generic_utils_test", size = "small", srcs = ["utils/generic_utils_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", "@absl_py//absl/testing:parameterized", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "tf_utils_test", size = "small", srcs = ["utils/tf_utils_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "io_utils_test", size = "small", srcs = ["utils/io_utils_test.py"], - srcs_version = "PY2AND3", + additional_deps = [ + ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], tags = [ "no_windows", # TODO: needs investigation on Windows "notsan", ], - deps = [ - ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], ) -py_test( +tf_py_test( name = "np_utils_test", size = "small", srcs = ["utils/np_utils_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) @@ -818,287 +787,267 @@ cuda_py_test( ], ) -py_test( +tf_py_test( name = "conv_utils_test", size = "small", srcs = ["utils/conv_utils_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "image_test", size = "medium", srcs = ["preprocessing/image_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "sequence_test", size = "small", srcs = ["preprocessing/sequence_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "text_test", size = "small", srcs = ["preprocessing/text_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "callbacks_test", size = "medium", srcs = ["callbacks_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + tags = ["notsan"], ) -py_test( +tf_py_test( name = "correctness_test", size = "medium", srcs = ["engine/correctness_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 2, + tags = ["notsan"], ) -py_test( +tf_py_test( name = "training_test", size = "medium", srcs = ["engine/training_test.py"], - shard_count = 16, - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 16, + tags = ["notsan"], ) -py_test( +tf_py_test( name = "training_dataset_test", size = "medium", srcs = ["engine/training_dataset_test.py"], - shard_count = 4, - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 4, ) -py_test( +tf_py_test( name = "training_generator_test", size = "large", srcs = ["engine/training_generator_test.py"], + additional_deps = [ + ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], shard_count = 3, - srcs_version = "PY2AND3", tags = [ "no_oss", "notsan", ], - deps = [ - ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", - ], ) -py_test( +tf_py_test( name = "feature_columns_integration_test", size = "small", srcs = ["engine/feature_columns_integration_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python/feature_column:feature_column_py", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], + tags = ["notsan"], ) -py_test( +tf_py_test( name = "training_eager_test", size = "medium", srcs = ["engine/training_eager_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + tags = ["notsan"], ) -py_test( +tf_py_test( name = "training_utils_test", size = "medium", srcs = ["engine/training_utils_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + tags = ["notsan"], ) -py_test( +tf_py_test( name = "model_subclassing_test", size = "medium", srcs = ["model_subclassing_test.py"], - shard_count = 4, - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + shard_count = 4, + tags = ["notsan"], ) -py_test( +tf_py_test( name = "topology_test", size = "medium", srcs = ["engine/topology_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no-internal-py3", - ], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], + tags = [ + "no-internal-py3", ], ) -py_test( +tf_py_test( name = "base_layer_test", size = "small", srcs = ["engine/base_layer_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "saving_test", size = "medium", srcs = ["engine/saving_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "sequential_test", size = "medium", srcs = ["engine/sequential_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], ) -py_test( +tf_py_test( name = "models_test", size = "medium", srcs = ["models_test.py"], - shard_count = 2, - srcs_version = "PY2AND3", - tags = ["notsan"], # b/67509773 - deps = [ + additional_deps = [ ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:training", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], + shard_count = 2, + tags = ["notsan"], # b/67509773 ) -py_test( +tf_py_test( name = "backend_test", size = "medium", srcs = ["backend_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":keras", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:util", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], ) -py_test( +tf_py_test( name = "keras_parameterized_test", size = "small", srcs = ["keras_parameterized_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], - deps = [ + additional_deps = [ ":keras", - "//tensorflow/python:client_testlib", - "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", ], + tags = ["notsan"], ) diff --git a/tensorflow/python/saved_model/BUILD b/tensorflow/python/saved_model/BUILD index 40d7e2f25e..8da6ff5142 100644 --- a/tensorflow/python/saved_model/BUILD +++ b/tensorflow/python/saved_model/BUILD @@ -11,7 +11,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_test") load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos") @@ -98,17 +98,16 @@ py_library( ], ) -py_test( +tf_py_test( name = "loader_test", size = "small", srcs = ["loader_test.py"], - srcs_version = "PY2AND3", - visibility = ["//visibility:private"], - deps = [ + additional_deps = [ ":builder", ":loader", ":signature_def_utils", ":utils", + "@absl_py//absl/testing:parameterized", "//tensorflow/python:client", "//tensorflow/python:client_testlib", "//tensorflow/python:control_flow_ops", @@ -118,7 +117,6 @@ py_test( "//tensorflow/python:state_ops", "//tensorflow/python:training", "//tensorflow/python:variables", - "@absl_py//absl/testing:parameterized", ], ) @@ -155,15 +153,11 @@ py_library( ], ) -py_test( +tf_py_test( name = "saved_model_test", size = "small", srcs = ["saved_model_test.py"], - data = ["//tensorflow/cc/saved_model:saved_model_half_plus_two"], - srcs_version = "PY2AND3", - tags = ["no_windows"], - visibility = ["//visibility:private"], - deps = [ + additional_deps = [ ":builder", ":constants", ":loader", @@ -186,6 +180,8 @@ py_test( "//tensorflow/python:util", "//tensorflow/python:variables", ], + data = ["//tensorflow/cc/saved_model:saved_model_half_plus_two"], + tags = ["no_windows"], ) py_library( @@ -205,13 +201,11 @@ py_library( ], ) -py_test( +tf_py_test( name = "utils_test", size = "small", srcs = ["utils_test.py"], - srcs_version = "PY2AND3", - visibility = ["//visibility:private"], - deps = [ + additional_deps = [ ":utils", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -237,13 +231,11 @@ py_library( ], ) -py_test( +tf_py_test( name = "signature_def_utils_test", size = "small", srcs = ["signature_def_utils_test.py"], - srcs_version = "PY2AND3", - visibility = ["//visibility:private"], - deps = [ + additional_deps = [ ":signature_constants", ":signature_def_utils", ":utils", @@ -254,12 +246,11 @@ py_test( ], ) -py_test( +tf_py_test( name = "simple_save_test", size = "small", srcs = ["simple_save_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":loader", ":signature_constants", ":simple_save", @@ -305,18 +296,17 @@ py_library( ], ) -py_test( +tf_py_test( name = "save_test", srcs = ["save_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":loader", ":save", ":signature_constants", ":tag_constants", + "@absl_py//absl/testing:parameterized", "//tensorflow/python/eager:def_function", "//tensorflow/python/eager:test", - "@absl_py//absl/testing:parameterized", ], ) @@ -339,11 +329,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "load_test", srcs = ["load_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":load", ":save", "//tensorflow/python:constant_op", @@ -408,10 +397,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "nested_structure_coder_test", srcs = ["nested_structure_coder_test.py"], - deps = [ + additional_deps = [ ":nested_structure_coder", ":struct_py", "//tensorflow/python:framework", diff --git a/tensorflow/python/training/checkpointable/BUILD b/tensorflow/python/training/checkpointable/BUILD index 595ce2a0da..855dc4fb68 100644 --- a/tensorflow/python/training/checkpointable/BUILD +++ b/tensorflow/python/training/checkpointable/BUILD @@ -11,7 +11,7 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_test") load("//tensorflow/compiler/tests:build_defs.bzl", "tf_xla_py_test") py_library( @@ -32,11 +32,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "base_test", srcs = ["base_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":base", "//tensorflow/python:client_testlib", ], @@ -52,11 +51,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "tracking_test", srcs = ["tracking_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":base", ":tracking", "//tensorflow/python:client_testlib", @@ -79,11 +77,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "data_structures_test", srcs = ["data_structures_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":data_structures", "//tensorflow/python:array_ops", "//tensorflow/python:framework_test_lib", @@ -129,15 +126,15 @@ py_library( ], ) -py_test( +tf_py_test( name = "util_test", srcs = ["util_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], # b/74395663 - deps = [ + additional_deps = [ ":base", ":tracking", ":util", + "@absl_py//absl/testing:parameterized", + "@six_archive//:six", "//tensorflow/python:checkpoint_management", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", @@ -160,9 +157,8 @@ py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras:engine", "//tensorflow/python/keras:layers", - "@absl_py//absl/testing:parameterized", - "@six_archive//:six", ], + tags = ["notsan"], # b/74395663 ) tf_xla_py_test( @@ -188,15 +184,15 @@ tf_xla_py_test( ], ) -py_test( +tf_py_test( name = "util_with_v1_optimizers_test", srcs = ["util_with_v1_optimizers_test.py"], - srcs_version = "PY2AND3", - tags = ["notsan"], # b/74395663 - deps = [ + additional_deps = [ ":base", ":tracking", ":util", + "@absl_py//absl/testing:parameterized", + "@six_archive//:six", "//tensorflow/python:checkpoint_management", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", @@ -220,7 +216,6 @@ py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras:engine", "//tensorflow/python/keras:layers", - "@absl_py//absl/testing:parameterized", - "@six_archive//:six", ], + tags = ["notsan"], # b/74395663 ) -- GitLab From 4083cc3b3b9614e5ed013858d8727a2e08df1e25 Mon Sep 17 00:00:00 2001 From: Siju Date: Mon, 31 Dec 2018 13:28:05 +0530 Subject: [PATCH 0090/1095] Update pattern_matcher.h --- tensorflow/compiler/xla/service/pattern_matcher.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index c362a60d94..83a73a90fb 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -1878,7 +1878,7 @@ class HloInstructionPattern { // Make this a templated function to work around gcc 4.9.4 template infinite // recursion bug. template - constexpr auto WithShapeEqualTo(const ::xla::Shape* shape) + constexpr auto WithShapeEqualTo(const ::xla::Shape* shape) const -> decltype(this->WithShape(Shape().EqualTo(shape))) { return WithShape(Shape().EqualTo(shape)); } @@ -1886,7 +1886,7 @@ class HloInstructionPattern { // Make this a templated function to work around gcc 4.9.4 template infinite // recursion bug. template - constexpr auto WithShapeCompatibleTo(const ::xla::Shape* shape) + constexpr auto WithShapeCompatibleTo(const ::xla::Shape* shape) const -> decltype(this->WithShape(Shape().CompatibleTo(shape))) { return WithShape(Shape().CompatibleTo(shape)); } -- GitLab From 75557c1e21c89cddb23c66c661655075371ac28b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 31 Dec 2019 01:02:28 -0800 Subject: [PATCH 0091/1095] compat: Update forward compatibility horizon to 2018-12-31 PiperOrigin-RevId: 227335513 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index c41e58a823..19efe84ca4 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 30) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 31) @tf_export("compat.forward_compatible") -- GitLab From a36fc1c38b9cf0541cffc8ea053dfb8bf01c58e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 31 Dec 2019 02:44:28 -0800 Subject: [PATCH 0092/1095] Remove NCCL remote config and dummy config. We currently have two ways to disable NCCL support: A) leave TF_NCCL_VERSION env variable undefined B) bazel flag '--config=nonccl' or '--define=no_nccl_support=true' After this change A) will build NCCL from source instead. Add license to other binary targets, now that we ship NCCL with them. PiperOrigin-RevId: 227342886 --- tensorflow/opensource_only.files | 13 ++++++------- tensorflow/tools/lib_package/BUILD | 2 ++ third_party/gpus/cuda_configure.bzl | 4 ++-- third_party/nccl/archive.BUILD | 2 +- third_party/nccl/nccl_configure.bzl | 23 +++++++++++------------ third_party/nccl/remote.BUILD.tpl | 6 ------ 6 files changed, 22 insertions(+), 28 deletions(-) delete mode 100644 third_party/nccl/remote.BUILD.tpl diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index 88800c2951..1054c285d6 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -69,13 +69,6 @@ tensorflow/third_party/toolchains/cpus/arm/BUILD tensorflow/third_party/toolchains/cpus/py3/BUILD tensorflow/third_party/toolchains/cpus/py/BUILD tensorflow/third_party/toolchains/BUILD -tensorflow/third_party/nccl/remote.BUILD.tpl -tensorflow/third_party/nccl/archive.BUILD -tensorflow/third_party/nccl/LICENSE -tensorflow/third_party/nccl/system.BUILD.tpl -tensorflow/third_party/nccl/nccl_configure.bzl -tensorflow/third_party/nccl/build_defs.bzl.tpl -tensorflow/third_party/nccl/BUILD tensorflow/third_party/gpus/BUILD tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl tensorflow/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl @@ -175,6 +168,12 @@ tensorflow/third_party/llvm/expand_cmake_vars.py tensorflow/third_party/llvm/llvm.autogenerated.BUILD tensorflow/third_party/llvm/llvm.bzl tensorflow/third_party/icu/udata.patch +tensorflow/third_party/nccl/archive.BUILD +tensorflow/third_party/nccl/LICENSE +tensorflow/third_party/nccl/system.BUILD.tpl +tensorflow/third_party/nccl/nccl_configure.bzl +tensorflow/third_party/nccl/build_defs.bzl.tpl +tensorflow/third_party/nccl/BUILD tensorflow/third_party/fft2d/BUILD tensorflow/third_party/fft2d/fft.h tensorflow/third_party/fft2d/LICENSE diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 1186189844..86bd510792 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -162,6 +162,7 @@ genrule( "//conditions:default": [], }) + if_cuda([ "@cub_archive//:LICENSE.TXT", + "@local_config_nccl//:LICENSE", ]) + if_mkl([ "//third_party/mkl:LICENSE", "//third_party/mkl_dnn:LICENSE", @@ -232,6 +233,7 @@ genrule( "//conditions:default": [], }) + if_cuda([ "@cub_archive//:LICENSE.TXT", + "@local_config_nccl//:LICENSE", ]) + if_mkl([ "//third_party/mkl:LICENSE", "//third_party/mkl_dnn:LICENSE", diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index 8aa5b89cdd..8de4fe58e5 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -400,7 +400,7 @@ def _cuda_include_path(repository_ctx, cuda_config): return "\n".join(inc_entries) -def _enable_cuda(repository_ctx): +def enable_cuda(repository_ctx): if "TF_NEED_CUDA" in repository_ctx.os.environ: enable_cuda = repository_ctx.os.environ["TF_NEED_CUDA"].strip() return enable_cuda == "1" @@ -1545,7 +1545,7 @@ def _create_remote_cuda_repository(repository_ctx, remote_config_repo): def _cuda_autoconf_impl(repository_ctx): """Implementation of the cuda_autoconf repository rule.""" - if not _enable_cuda(repository_ctx): + if not enable_cuda(repository_ctx): _create_dummy_repository(repository_ctx) elif _TF_CUDA_CONFIG_REPO in repository_ctx.os.environ: _create_remote_cuda_repository( diff --git a/third_party/nccl/archive.BUILD b/third_party/nccl/archive.BUILD index 211b794bb0..5901c6b296 100644 --- a/third_party/nccl/archive.BUILD +++ b/third_party/nccl/archive.BUILD @@ -96,7 +96,7 @@ cc_library( ], hdrs = ["nccl.h"], copts = cuda_default_copts() + ["-Wno-vla"], - include_prefix = "third_party/nccl/", + include_prefix = "third_party/nccl", visibility = ["//visibility:public"], deps = [ ":device", diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index 1e6422b49e..f7d7d55383 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -13,7 +13,9 @@ load( "auto_configure_fail", "compute_capabilities", "cuda_toolkit_path", + "enable_cuda", "find_cuda_define", + "get_cpu_value", "matches_version", ) @@ -22,7 +24,7 @@ _NCCL_HDR_PATH = "NCCL_HDR_PATH" _NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" _TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" _TF_NCCL_VERSION = "TF_NCCL_VERSION" -_TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO" +_TF_NEED_CUDA = "TF_NEED_CUDA" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" _DEFINE_NCCL_MINOR = "#define NCCL_MINOR" @@ -116,26 +118,23 @@ def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_v header_version = "%s.%s.%s" % (major_version, minor_version, patch_version) if not matches_version(nccl_version, header_version): auto_configure_fail( - ("NCCL library version detected from %s/nccl.h (%s) does not match " + - "TF_NCCL_VERSION (%s). To fix this rerun configure again.") % + ("NCCL library version detected from %s/nccl.h (%s) does not " + + "match TF_NCCL_VERSION (%s). To fix this rerun configure again.") % (header_dir, header_version, nccl_version), ) def _nccl_configure_impl(repository_ctx): """Implementation of the nccl_configure repository rule.""" - if _TF_NCCL_VERSION not in repository_ctx.os.environ: + if not enable_cuda(repository_ctx) or \ + get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD"): # Add a dummy build file to make bazel query happy. repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) return - if _TF_NCCL_CONFIG_REPO in repository_ctx.os.environ: - # Forward to the pre-configured remote repository. - repository_ctx.template("BUILD", _label("remote.BUILD.tpl"), { - "%{target}": repository_ctx.os.environ[_TF_NCCL_CONFIG_REPO], - }) - return + nccl_version = "" + if _TF_NCCL_VERSION in repository_ctx.os.environ: + nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() - nccl_version = repository_ctx.os.environ[_TF_NCCL_VERSION].strip() if nccl_version == "": # Alias to open source build from @nccl_archive. repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) @@ -179,7 +178,7 @@ nccl_configure = repository_rule( _NCCL_INSTALL_PATH, _TF_NCCL_VERSION, _TF_CUDA_COMPUTE_CAPABILITIES, - _TF_NCCL_CONFIG_REPO, + _TF_NEED_CUDA, ], ) """Detects and configures the NCCL configuration. diff --git a/third_party/nccl/remote.BUILD.tpl b/third_party/nccl/remote.BUILD.tpl deleted file mode 100644 index d66fc5563d..0000000000 --- a/third_party/nccl/remote.BUILD.tpl +++ /dev/null @@ -1,6 +0,0 @@ -licenses(["restricted"]) - -package(default_visibility = ["//visibility:public"]) - -alias(name="LICENSE", actual = "%{target}:LICENSE") -alias(name = "nccl", actual = "%{target}:nccl") -- GitLab From 853d417a891ecdc893bc2c448d426833df74c7e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 31 Dec 2019 03:21:38 -0800 Subject: [PATCH 0093/1095] Replace deprecated `to_float` with `tf.cast(_, dtypes.float32`). A noop. PiperOrigin-RevId: 227345610 --- tensorflow/contrib/gan/BUILD | 5 ++--- .../gan/python/losses/python/losses_impl.py | 17 +++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index e1d198fe7b..0fa229880b 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -145,16 +145,15 @@ py_library( "//tensorflow/contrib/framework:framework_py", "//tensorflow/python:array_ops", "//tensorflow/python:clip_ops", + "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", - "//tensorflow/python:gradients", + "//tensorflow/python:gradients_impl", "//tensorflow/python:math_ops", "//tensorflow/python:random_ops", "//tensorflow/python:summary", "//tensorflow/python:tensor_util", "//tensorflow/python:variable_scope", - "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/losses", - "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl.py b/tensorflow/contrib/gan/python/losses/python/losses_impl.py index 930c91786f..1f1ae2df4d 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl.py @@ -38,6 +38,7 @@ from __future__ import print_function from tensorflow.contrib.framework.python.ops import variables as contrib_variables_lib +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops @@ -69,6 +70,10 @@ __all__ = [ ] +def _to_float(tensor): + return math_ops.cast(tensor, dtypes.float32) + + # Wasserstein losses from `Wasserstein GAN` (https://arxiv.org/abs/1701.07875). def wasserstein_generator_loss( discriminator_gen_outputs, @@ -98,7 +103,7 @@ def wasserstein_generator_loss( """ with ops.name_scope(scope, 'generator_wasserstein_loss', ( discriminator_gen_outputs, weights)) as scope: - discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs) + discriminator_gen_outputs = _to_float(discriminator_gen_outputs) loss = - discriminator_gen_outputs loss = losses.compute_weighted_loss( @@ -144,8 +149,8 @@ def wasserstein_discriminator_loss( with ops.name_scope(scope, 'discriminator_wasserstein_loss', ( discriminator_real_outputs, discriminator_gen_outputs, real_weights, generated_weights)) as scope: - discriminator_real_outputs = math_ops.to_float(discriminator_real_outputs) - discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs) + discriminator_real_outputs = _to_float(discriminator_real_outputs) + discriminator_gen_outputs = _to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) @@ -647,7 +652,7 @@ def least_squares_generator_loss( """ with ops.name_scope(scope, 'lsq_generator_loss', (discriminator_gen_outputs, real_label)) as scope: - discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs) + discriminator_gen_outputs = _to_float(discriminator_gen_outputs) loss = math_ops.squared_difference( discriminator_gen_outputs, real_label) / 2.0 loss = losses.compute_weighted_loss( @@ -702,8 +707,8 @@ def least_squares_discriminator_loss( """ with ops.name_scope(scope, 'lsq_discriminator_loss', (discriminator_gen_outputs, real_label)) as scope: - discriminator_real_outputs = math_ops.to_float(discriminator_real_outputs) - discriminator_gen_outputs = math_ops.to_float(discriminator_gen_outputs) + discriminator_real_outputs = _to_float(discriminator_real_outputs) + discriminator_gen_outputs = _to_float(discriminator_gen_outputs) discriminator_real_outputs.shape.assert_is_compatible_with( discriminator_gen_outputs.shape) -- GitLab From 7914dbf7a2b9c2b555e3e55423905bdb22c6d759 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 31 Dec 2019 06:34:09 -0800 Subject: [PATCH 0094/1095] [tfgan] Fix documentation bug classifer_impl. PiperOrigin-RevId: 227356865 --- .../eval/python/classifier_metrics_impl.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py index ea55241b34..31f0d34ed6 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Model evaluation tools for TFGAN. +"""Model evaluation tools for TF-GAN. These methods come from https://arxiv.org/abs/1606.03498, https://arxiv.org/abs/1706.08500, and https://arxiv.org/abs/1801.01401. @@ -795,9 +795,9 @@ def kernel_classifier_distance(real_images, on a classifier. num_classifier_batches: Number of batches to split images in to in order to efficiently run them through the classifier network. - max_estimator_block_size: integer, default 1024. The distance estimator - splits samples into blocks for computational efficiency. Larger values are - more computationally expensive but decrease the variance of the distance + max_block_size: integer, default 1024. The distance estimator splits samples + into blocks for computational efficiency. Larger values are more + computationally expensive but decrease the variance of the distance estimate. dtype: if not None, coerce activations to this dtype before computations. @@ -872,9 +872,9 @@ def kernel_classifier_distance_and_std(real_images, on a classifier. num_classifier_batches: Number of batches to split images in to in order to efficiently run them through the classifier network. - max_estimator_block_size: integer, default 1024. The distance estimator - splits samples into blocks for computational efficiency. Larger values are - more computationally expensive but decrease the variance of the distance + max_block_size: integer, default 1024. The distance estimator splits samples + into blocks for computational efficiency. Larger values are more + computationally expensive but decrease the variance of the distance estimate. Having a smaller block size also gives a better estimate of the standard error. dtype: if not None, coerce activations to this dtype before computations. @@ -911,7 +911,7 @@ def kernel_classifier_distance_and_std(real_images, gen_a = array_ops.concat(array_ops.unstack(gen_a), 0) return kernel_classifier_distance_and_std_from_activations( - real_a, gen_a, max_block_size=max_block_size) + real_a, gen_a, max_block_size, dtype) kernel_inception_distance_and_std = functools.partial( @@ -968,14 +968,14 @@ def kernel_classifier_distance_from_activations(real_activations, into blocks for computational efficiency. Larger values are more computationally expensive but decrease the variance of the distance estimate. - dtype: if not None, coerce activations to this dtype before computations. + dtype: If not None, coerce activations to this dtype before computations. Returns: The Kernel Inception Distance. A floating-point scalar of the same type as the output of the activations. """ return kernel_classifier_distance_and_std_from_activations( - real_activations, generated_activations, max_block_size=max_block_size)[0] + real_activations, generated_activations, max_block_size, dtype)[0] def kernel_classifier_distance_and_std_from_activations(real_activations, @@ -1030,7 +1030,7 @@ def kernel_classifier_distance_and_std_from_activations(real_activations, computationally expensive but decrease the variance of the distance estimate. Having a smaller block size also gives a better estimate of the standard error. - dtype: if not None, coerce activations to this dtype before computations. + dtype: If not None, coerce activations to this dtype before computations. Returns: The Kernel Inception Distance. A floating-point scalar of the same type @@ -1081,7 +1081,7 @@ def kernel_classifier_distance_and_std_from_activations(real_activations, dim = math_ops.cast(real_activations.shape[1], dtype) def compute_kid_block(i): - 'Compute the ith block of the KID estimate.' + """Computes the ith block of the KID estimate.""" r_s = inds_r[i] r_e = inds_r[i + 1] r = real_activations[r_s:r_e] -- GitLab From 3ae375aa92fbb6155f82393735d0b98d8fb9c1b2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 31 Dec 2019 09:45:15 -0800 Subject: [PATCH 0095/1095] Fix naming in README. PiperOrigin-RevId: 227367542 --- tensorflow/contrib/gan/README.md | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md index 1762353a0d..db7dc51daa 100644 --- a/tensorflow/contrib/gan/README.md +++ b/tensorflow/contrib/gan/README.md @@ -1,7 +1,8 @@ -# TensorFlow-GAN (TFGAN) + +# TensorFlow-GAN (TF-GAN) -TFGAN is a lightweight library for training and evaluating Generative +TF-GAN is a lightweight library for training and evaluating Generative Adversarial Networks (GANs). This technique allows you to train a network (called the 'generator') to sample from a distribution, without having to explicitly model the distribution and without writing an explicit loss. For @@ -17,27 +18,27 @@ import tensorflow as tf tfgan = tf.contrib.gan ``` -## Why TFGAN? +## Why TF-GAN? * Easily train generator and discriminator networks with well-tested, flexible [library calls](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/train.py). You can -mix TFGAN, native TF, and other custom frameworks +mix TF-GAN, native TF, and other custom frameworks * Use already implemented [GAN losses and penalties](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/losses/python/losses_impl.py) (ex Wasserstein loss, gradient penalty, mutual information penalty, etc) * [Monitor and visualize](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/eval/python/summaries_impl.py) GAN progress during training, and [evaluate](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py) them * Use already-implemented [tricks](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/features/python/) to stabilize and improve training * Develop based on examples of [common GAN setups](https://github.com/tensorflow/models/tree/master/research/gan/) -* Use the TFGAN-backed [GANEstimator](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py) to easily train a GAN model -* Improvements in TFGAN infrastructure will automatically benefit your TFGAN project +* Use the TF-GAN-backed [GANEstimator](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/estimator/python/gan_estimator_impl.py) to easily train a GAN model +* Improvements in TF-GAN infrastructure will automatically benefit your TF-GAN project * Stay up-to-date with research as we add more algorithms -## What are the TFGAN components? +## What are the TF-GAN components? -TFGAN is composed of several parts which were design to exist independently. +TF-GAN is composed of several parts which were design to exist independently. These include the following main pieces (explained in detail below). * [core](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/train.py): provides the main infrastructure needed to train a GAN. Training occurs in four phases, and each phase can be completed by custom-code or by using a - TFGAN library call. + TF-GAN library call. * [features](https://www.tensorflow.org/code/tensorflow/contrib/gan/python/features/python/): Many common GAN operations and normalization techniques are implemented for @@ -56,7 +57,7 @@ These include the following main pieces (explained in detail below). generative models. * [examples](https://github.com/tensorflow/models/tree/master/research/gan/) - and [tutorial](http://https://github.com/tensorflow/models/tree/master/research/gan/tutorial.ipynb): See examples of how to use TFGAN + and [tutorial](http://https://github.com/tensorflow/models/tree/master/research/gan/tutorial.ipynb): See examples of how to use TF-GAN to make GAN training easier, or use the more complicated examples to jumpstart your own project. These include unconditional and conditional GANs, InfoGANs, adversarial losses on existing networks, and image-to-image @@ -64,7 +65,7 @@ These include the following main pieces (explained in detail below). ## Training a GAN model -Training in TFGAN typically consists of the following steps: +Training in TF-GAN typically consists of the following steps: 1. Specify the input to your networks. 1. Set up your generator and discriminator using a `GANModel`. @@ -72,12 +73,12 @@ Training in TFGAN typically consists of the following steps: 1. Create your train ops using a `GANTrainOps`. 1. Run your train ops. -At each stage, you can either use TFGAN's convenience functions, or you can +At each stage, you can either use TF-GAN's convenience functions, or you can perform the step manually for fine-grained control. We provide examples below. There are various types of GAN setups. For instance, you can train a generator to sample unconditionally from a learned distribution, or you can condition on -extra information such as a class label. TFGAN is compatible with many setups, +extra information such as a class label. TF-GAN is compatible with many setups, and we demonstrate a few below: ### Examples @@ -255,9 +256,9 @@ with variable_scope.variable_scope(dis_scope, reuse=True): discriminator_real_outputs = discriminator_fn(images) generator_variables = variables_lib.get_trainable_variables(gen_scope) discriminator_variables = variables_lib.get_trainable_variables(dis_scope) -# Depending on what TFGAN features you use, you don't always need to supply +# Depending on what TF-GAN features you use, you don't always need to supply # every `GANModel` field. At a minimum, you need to include the discriminator -# outputs and variables if you want to use TFGAN to construct losses. +# outputs and variables if you want to use TF-GAN to construct losses. gan_model = tfgan.GANModel( generator_inputs, generated_data, -- GitLab From ceb64c15db61855183d3b88504b5469af0f423c7 Mon Sep 17 00:00:00 2001 From: Dheeraj Rajaram Reddy Date: Tue, 1 Jan 2019 01:25:55 +0530 Subject: [PATCH 0096/1095] Add take_while experimental dataset op (tests pending) --- .../core/kernels/data/experimental/BUILD | 14 + .../experimental/take_while_dataset_op.cc | 242 ++++++++++++++++++ .../core/ops/experimental_dataset_ops.cc | 11 + .../python/data/experimental/__init__.py | 1 + tensorflow/python/data/experimental/ops/BUILD | 14 + .../data/experimental/ops/take_while_ops.py | 79 ++++++ 6 files changed, 361 insertions(+) create mode 100644 tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc create mode 100644 tensorflow/python/data/experimental/ops/take_while_ops.py diff --git a/tensorflow/core/kernels/data/experimental/BUILD b/tensorflow/core/kernels/data/experimental/BUILD index 7433303f77..4ce14b0140 100644 --- a/tensorflow/core/kernels/data/experimental/BUILD +++ b/tensorflow/core/kernels/data/experimental/BUILD @@ -293,6 +293,19 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "take_while_dataset_op", + srcs = ["take_while_dataset_op.cc"], + deps = [ + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:dataset_ops_op_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels/data:captured_function", + ], +) + tf_kernel_library( name = "to_tf_record_op", srcs = ["to_tf_record_op.cc"], @@ -365,6 +378,7 @@ tf_kernel_library( ":sql_dataset_op", ":stats_aggregator_ops", ":stats_dataset_ops", + ":take_while_dataset_op", ":threadpool_dataset_op", ":to_tf_record_op", ":unbatch_dataset_op", diff --git a/tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc b/tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc new file mode 100644 index 0000000000..271b919bc9 --- /dev/null +++ b/tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc @@ -0,0 +1,242 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/framework/partial_tensor_shape.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/data/captured_function.h" + +namespace tensorflow { +namespace data { +namespace { + +// See documentation in ../../ops/dataset_ops.cc for a high-level +// description of the following op. + +class TakeWhileDatasetOp : public UnaryDatasetOpKernel { + public: + explicit TakeWhileDatasetOp(OpKernelConstruction* ctx) + : UnaryDatasetOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_)); + OP_REQUIRES_OK( + ctx, ctx->GetAttr("preserve_cardinality", &preserve_cardinality_)); + } + + void MakeDataset(OpKernelContext* ctx, DatasetBase* input, + DatasetBase** output) override { + std::unique_ptr captured_func; + OP_REQUIRES_OK(ctx, CapturedFunction::Create(func_, ctx, "other_arguments", + &captured_func)); + + // TODO (squadrick): check short-circuit + *output = new Dataset(ctx, input, func_, std::move(captured_func), + preserve_cardinality_); + } + + private: + class Dataset : public DatasetBase { + public: + Dataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, + std::unique_ptr captured_func, + bool preserve_cardinality) + : DatasetBase(DatasetContext(ctx)), + input_(input), + func_(func), + captured_func_(std::move(captured_func)), + preserve_cardinality_(preserve_cardinality) { + input_->Ref(); + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override { + return std::unique_ptr( + new Iterator({this, strings::StrCat(prefix, "::TakeWhile")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + const std::vector& output_shapes() const override { + return input_->output_shapes(); + } + + string DebugString() const override { return "TakeWhileDatasetOp::Dataset"; } + + int64 Cardinality() const override { return input_->Cardinality(); } + + protected: + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override { + TF_RETURN_IF_ERROR(b->AddFunction(ctx, func_.name())); + Node* input_node; + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_node)); + + std::vector other_arguments; + other_arguments.reserve(captured_func_->captured_inputs().size()); + DataTypeVector other_arguments_types; + other_arguments_types.reserve(captured_func_->captured_inputs().size()); + for (const Tensor& t : captured_func_->captured_inputs()) { + Node* node; + DatasetBase* input; + Status s = GetDatasetFromVariantTensor(t, &input); + if (s.ok()) { + TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input, &node)); + } else { + TF_RETURN_IF_ERROR(b->AddTensor(t, &node)); + } + other_arguments.emplace_back(node); + other_arguments_types.emplace_back(t.dtype()); + } + AttrValue f_attr; + b->BuildAttrValue(func_, &f_attr); + + AttrValue other_arguments_types_attr; + b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); + + AttrValue preserve_cardinality_attr; + b->BuildAttrValue(preserve_cardinality_, &preserve_cardinality_attr); + + TF_RETURN_IF_ERROR(b->AddDataset( + this, {std::make_pair(0, input_node)}, + {std::make_pair(1, other_arguments)}, + {std::make_pair("predicate", f_attr), + std::make_pair("Targuments", other_arguments_types_attr), + std::make_pair("preserve_cardinality", + preserve_cardinality_attr)}, + output)); + return Status::OK(); + } + + private: + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params) {} + + Status Initialize(IteratorContext* ctx) override { + TF_RETURN_IF_ERROR( + dataset()->input_->MakeIterator(ctx, prefix(), &input_impl_)); + return dataset()->captured_func_->Instantiate( + ctx, &instantiated_captured_func_); + } + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + { + tf_shared_lock l(mu_); + if(!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + TF_RETURN_IF_ERROR( + input_impl_->GetNext(ctx, out_tensors, end_of_sequence)); + } + if (*end_of_sequence) { + mutex_lock l(mu_); + input_impl_.reset(); + return Status::OK(); + } + + std::vector bool_output; + + Status s = instantiated_captured_func_->RunWithBorrowedArgs( + ctx, *out_tensors, &bool_output); + + if (s.ok()) { + if(bool_output.size() != 1 || bool_output[0].dtype() != DT_BOOL || + bool_output[0].NumElements() != 1) { + return errors::InvalidArgument( + "`predicate` must returns a scalar bool tensor."); + } + auto cond = bool_output[0].scalar()(); + if (!cond) { // predicate is false + *end_of_sequence = true; + return Status::OK(); + } + } else if (errors::IsOutOfRange(s)) { + if (dataset()->preserve_cardinality_) { + // To guarantee that the transformation preserves the cardinality of + // the dataset, we convert `OutOfRange` to `InvalidArgument` as the + // former may be interpreted by a caller as the end of sequence. + return errors::InvalidArgument( + "Function invocation produced OutOfRangeError: ", + s.error_message()); + } else { + // `f` may deliberately raise `errors::OutOfRange` to indicate + // that we should terminate the iteration early. + *end_of_sequence = true; + return Status::OK(); + } + } + return s; + } + + protected: + std::shared_ptr CreateNode( + IteratorContext* ctx, model::Node::Args args) const override { + return model::MakeKnownRatioNode(std::move(args), + /*ratio=*/1); + } + + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (input_impl_) + TF_RETURN_IF_ERROR(SaveInput(writer, input_impl_)); + else + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impls_empty"), "")); + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (reader->Contains(full_name("input_impls_empty"))) + input_impl_.reset(); + else + TF_RETURN_IF_ERROR(RestoreInput(ctx, reader, input_impl_)); + return Status::OK(); + } + + private: + mutex mu_; + std::unique_ptr input_impl_ GUARDED_BY(mu_); + std::unique_ptr instantiated_captured_func_; + }; + + const DatasetBase* const input_; + const NameAttrList func_; + const std::unique_ptr captured_func_; + const bool preserve_cardinality_; + }; + + NameAttrList func_; + bool preserve_cardinality_; +}; + +REGISTER_KERNEL_BUILDER(Name("ExperimentalTakeWhileDataset").Device(DEVICE_CPU), + TakeWhileDatasetOp); + +} // namespace +} // namespace data +} // namespace tensorflow diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc index f904e2536d..d10a13f06f 100644 --- a/tensorflow/core/ops/experimental_dataset_ops.cc +++ b/tensorflow/core/ops/experimental_dataset_ops.cc @@ -352,6 +352,17 @@ REGISTER_OP("ExperimentalStatsAggregatorSummary") .Output("summary: string") .SetShapeFn(shape_inference::ScalarShape); +REGISTER_OP("ExperimentalTakeWhileDataset") + .Input("input_dataset: variant") + .Input("other_arguments: Targuments") + .Output("handle: variant") + .Attr("predicate: func") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("preserve_cardinality: bool = false") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("ExperimentalUnbatchDataset") .Input("input_dataset: variant") .Output("handle: variant") diff --git a/tensorflow/python/data/experimental/__init__.py b/tensorflow/python/data/experimental/__init__.py index ffc2e5ef5f..016acefa1f 100644 --- a/tensorflow/python/data/experimental/__init__.py +++ b/tensorflow/python/data/experimental/__init__.py @@ -115,6 +115,7 @@ from tensorflow.python.data.experimental.ops.shuffle_ops import shuffle_and_repe from tensorflow.python.data.experimental.ops.stats_aggregator import StatsAggregator from tensorflow.python.data.experimental.ops.stats_ops import latency_stats from tensorflow.python.data.experimental.ops.stats_options import StatsOptions +from tensorflow.python.data.experimental.ops.take_while_ops import take_while from tensorflow.python.data.experimental.ops.threading_options import ThreadingOptions from tensorflow.python.data.experimental.ops.unique import unique from tensorflow.python.data.experimental.ops.writers import TFRecordWriter diff --git a/tensorflow/python/data/experimental/ops/BUILD b/tensorflow/python/data/experimental/ops/BUILD index 60c20e0bcf..95e9509678 100644 --- a/tensorflow/python/data/experimental/ops/BUILD +++ b/tensorflow/python/data/experimental/ops/BUILD @@ -354,6 +354,19 @@ py_library( ], ) +py_library( + name = "take_while_ops", + srcs = ["take_while_ops.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:experimental_dataset_ops_gen", + "//tensorflow/python:framework_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:function", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + py_library( name = "threading_options", srcs = ["threading_options.py"], @@ -454,6 +467,7 @@ py_library( ":shuffle_ops", ":sleep", ":stats_ops", + "take_while_ops", ":threadpool", ":unique", ":writers", diff --git a/tensorflow/python/data/experimental/ops/take_while_ops.py b/tensorflow/python/data/experimental/ops/take_while_ops.py new file mode 100644 index 0000000000..edbecd0666 --- /dev/null +++ b/tensorflow/python/data/experimental/ops/take_while_ops.py @@ -0,0 +1,79 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""take-while dataset transformation.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import structure as structure_lib +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_experimental_dataset_ops +from tensorflow.python.util.tf_export import tf_export + + +class _TakeWhileDataset(dataset_ops.UnaryUnchangedStructureDataset): + """A dataset that stops iteration when `predicate` returns false.""" + + def __init__(self, input_dataset, predicate): + """See `take_while()` for details.""" + + self._input_dataset = input_dataset + wrapped_func = dataset_ops.StructuredFunctionWrapper( + predicate, + self._transformation_name(), + dataset=self._input_dataset) + + if not wrapped_func.output_structure.is_compatible_with( + structure_lib.TensorStructure(dtypes.bool, [])): + raise ValueError("`predicate` must return a scalar boolean tensor.") + + self._predicate = wrapped_func + + variant_tensor = gen_experimental_dataset_ops.experimental_take_while_dataset( + self._input_dataset._variant_tensor, + other_arguments=self._predicate.function.captured_inputs, + predicate=self._predicate.function, + preserve_cardinality=True, + **dataset_ops.flat_structure(self)) + super(_TakeWhileDataset, self).__init__(input_dataset, variant_tensor) + + def _functions(self): + return [self._predicate] + + def _transformation_name(self): + return "tf.data.experimental.take_while()" + + +@tf_export("data.experimental.take_while") +def take_while(predicate): + """A transformation that stops dataset iteration based on a `predicate` condition + + Args: + predicate: A function that maps a nested structure of tensors + (having shapes and types defined by `self.output_shapes` and + `self.output_types`) to a scalar `tf.bool` tensor. + + Returns: + A `Dataset` transformation function, which can be passed to + `tf.data.Dataset.apply`. + """ + def _apply_fn(dataset): + return _TakeWhileDataset(dataset, predicate) + + return _apply_fn -- GitLab From 15d49deb3c5d4048718af19a2e0e0279e8f204d5 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 1 Jan 2019 00:16:30 +0000 Subject: [PATCH 0097/1095] Update eigen library to 88fc23324517 to fix 24457 This fix updates eigen library to 88fc23324517 so that the issue raised in 24457 could be fixed. This fix fixes 24457. Signed-off-by: Yong Tang --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 11ce55feda..d32caceb9e 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -136,11 +136,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "eigen_archive", build_file = clean_dep("//third_party:eigen.BUILD"), - sha256 = "753fbb58d0a49b6bcbcfb126ebfa2e21fc97f7471529ba835a096008ce588d8a", - strip_prefix = "eigen-eigen-9f48e814419e", + sha256 = "9de38f2d162c51599b802f7c36d9f3773980d19ac908c61638f8344d2c10e1ca", + strip_prefix = "eigen-eigen-88fc23324517", urls = [ - "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz", - "https://bitbucket.org/eigen/eigen/get/9f48e814419e.tar.gz", + "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/88fc23324517..tar.gz", + "https://bitbucket.org/eigen/eigen/get/88fc23324517.tar.gz", ], ) -- GitLab From d60d04b68a91ac3086b93d8a83c148db8e84edc6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 1 Jan 2019 01:02:27 -0800 Subject: [PATCH 0098/1095] compat: Update forward compatibility horizon to 2019-01-01 PiperOrigin-RevId: 227410769 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 19efe84ca4..2284d0021c 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 31) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 1, 1) @tf_export("compat.forward_compatible") -- GitLab From c9de778aa12004428ae99186965655c97c8a1c5a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 1 Jan 2019 04:09:09 -0800 Subject: [PATCH 0099/1095] More `TFGAN` -> `TF-GAN`. PiperOrigin-RevId: 227422079 --- tensorflow/contrib/gan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/__init__.py b/tensorflow/contrib/gan/__init__.py index f1946c7f92..ea51e8a569 100644 --- a/tensorflow/contrib/gan/__init__.py +++ b/tensorflow/contrib/gan/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TFGAN is a lightweight library for training and evaluating GANs. +"""TF-GAN is a lightweight library for training and evaluating GANs. In addition to providing the infrastructure for easily training and evaluating GANS, this library contains modules for a TFGAN-backed Estimator, -- GitLab From 34b98c8f0ef3ee852e90dcebdde935ceae31dfe0 Mon Sep 17 00:00:00 2001 From: Siju Date: Tue, 1 Jan 2019 18:55:56 +0530 Subject: [PATCH 0100/1095] Unused variable removed in softmax_op_functor.h ^ --- tensorflow/core/kernels/softmax_op_functor.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/kernels/softmax_op_functor.h b/tensorflow/core/kernels/softmax_op_functor.h index c8bc1ad3bb..218698f3ff 100644 --- a/tensorflow/core/kernels/softmax_op_functor.h +++ b/tensorflow/core/kernels/softmax_op_functor.h @@ -57,7 +57,6 @@ struct SoftmaxEigenImpl { Eigen::DSizes one_by_class(1, num_classes); #else Eigen::IndexList > along_class; - Eigen::IndexList > depth_dim; Eigen::IndexList > batch_by_one; batch_by_one.set(0, batch_size); Eigen::IndexList, int> one_by_class; -- GitLab From fcffdde4d6a918a7971631b444e8fc0cb8b8244b Mon Sep 17 00:00:00 2001 From: Siju Date: Tue, 1 Jan 2019 19:19:48 +0530 Subject: [PATCH 0101/1095] SynchronousMemcpy changed to SynchronousMemcpyD2H and SynchronousMemcpyH2D --- .../core/common_runtime/gpu/gpu_debug_allocator.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index 989ddbe4af..7268d6cf6a 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -44,7 +44,8 @@ bool CheckMask(se::StreamExecutor* exec, void* ptr, int64* mask) { se::DeviceMemory gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}}; int64 tmp[MASK_WORDS]; - if (!exec->SynchronousMemcpy(&tmp, gpu_ptr, MASK_BYTES)) { + Status result = exec->SynchronousMemcpyD2H(gpu_ptr, MASK_BYTES, tmp); + if (!result.ok()) { LOG(FATAL) << "Could not copy debug mask"; } @@ -63,7 +64,8 @@ bool CheckMask(se::StreamExecutor* exec, void* ptr, int64* mask) { void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) { se::DeviceMemory gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}}; - if (!exec->SynchronousMemcpy(&gpu_ptr, mask, MASK_BYTES)) { + Status result = exec->SynchronousMemcpyH2D(mask, MASK_BYTES, &gpu_ptr); + if (!result.ok()) { LOG(FATAL) << "Could not copy debug mask"; } } @@ -171,7 +173,9 @@ void* GPUNanResetAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { se::DeviceMemory nan_ptr{ se::DeviceMemoryBase{static_cast(allocated_ptr), req_size}}; - if (!stream_exec_->SynchronousMemcpy(&nan_ptr, &nans[0], req_size)) { + Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, + &nan_ptr); + if (!result.ok()) { LOG(ERROR) << "Could not initialize to NaNs"; } @@ -185,7 +189,9 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) { std::nanf("")); se::DeviceMemory nan_ptr{ se::DeviceMemoryBase{static_cast(ptr), req_size}}; - if (!stream_exec_->SynchronousMemcpy(&nan_ptr, &nans[0], req_size)) { + Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, + &nan_ptr); + if (!result.ok()) { LOG(ERROR) << "Could not initialize to NaNs"; } } -- GitLab From 3c80e17e79c1920dbba1201d0c50a818772f7e1f Mon Sep 17 00:00:00 2001 From: Siju Date: Tue, 1 Jan 2019 19:23:23 +0530 Subject: [PATCH 0102/1095] Removed unnecessary spaces --- tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc index 7268d6cf6a..77737a76b6 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_debug_allocator.cc @@ -173,7 +173,7 @@ void* GPUNanResetAllocator::AllocateRaw(size_t alignment, size_t num_bytes) { se::DeviceMemory nan_ptr{ se::DeviceMemoryBase{static_cast(allocated_ptr), req_size}}; - Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, + Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr); if (!result.ok()) { LOG(ERROR) << "Could not initialize to NaNs"; @@ -189,7 +189,7 @@ void GPUNanResetAllocator::DeallocateRaw(void* ptr) { std::nanf("")); se::DeviceMemory nan_ptr{ se::DeviceMemoryBase{static_cast(ptr), req_size}}; - Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, + Status result = stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr); if (!result.ok()) { LOG(ERROR) << "Could not initialize to NaNs"; -- GitLab From dd06c3959737752fe84cd133e0b5137a8ee2d956 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 1 Jan 2019 05:56:40 -0800 Subject: [PATCH 0103/1095] More `TFGAN` -> `TF-GAN`. PiperOrigin-RevId: 227427693 --- tensorflow/contrib/gan/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/__init__.py b/tensorflow/contrib/gan/__init__.py index ea51e8a569..1e6000898f 100644 --- a/tensorflow/contrib/gan/__init__.py +++ b/tensorflow/contrib/gan/__init__.py @@ -24,7 +24,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# Collapse TFGAN into a tiered namespace. +# Collapse TF-GAN into a tiered namespace. from tensorflow.contrib.gan.python import estimator from tensorflow.contrib.gan.python import eval # pylint:disable=redefined-builtin from tensorflow.contrib.gan.python import features -- GitLab From 5356b6898b512d3202088299ab422208fc5f8bb5 Mon Sep 17 00:00:00 2001 From: Dheeraj Rajaram Reddy Date: Tue, 1 Jan 2019 22:26:40 +0530 Subject: [PATCH 0104/1095] Make required changes Remove preserve_cardinality Update license year Propagate predicate's error to the caller Inline _transformation_name --- .../experimental/take_while_dataset_op.cc | 46 ++++--------------- .../core/ops/experimental_dataset_ops.cc | 1 - .../data/experimental/ops/take_while_ops.py | 8 +--- 3 files changed, 12 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc b/tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc index 271b919bc9..b18cc32051 100644 --- a/tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc +++ b/tensorflow/core/kernels/data/experimental/take_while_dataset_op.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,8 +33,6 @@ class TakeWhileDatasetOp : public UnaryDatasetOpKernel { explicit TakeWhileDatasetOp(OpKernelConstruction* ctx) : UnaryDatasetOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("predicate", &func_)); - OP_REQUIRES_OK( - ctx, ctx->GetAttr("preserve_cardinality", &preserve_cardinality_)); } void MakeDataset(OpKernelContext* ctx, DatasetBase* input, @@ -44,22 +42,19 @@ class TakeWhileDatasetOp : public UnaryDatasetOpKernel { &captured_func)); // TODO (squadrick): check short-circuit - *output = new Dataset(ctx, input, func_, std::move(captured_func), - preserve_cardinality_); + *output = new Dataset(ctx, input, func_, std::move(captured_func)); } private: class Dataset : public DatasetBase { public: Dataset(OpKernelContext* ctx, const DatasetBase* input, - const NameAttrList& func, - std::unique_ptr captured_func, - bool preserve_cardinality) + const NameAttrList& func, + std::unique_ptr captured_func) : DatasetBase(DatasetContext(ctx)), input_(input), func_(func), - captured_func_(std::move(captured_func)), - preserve_cardinality_(preserve_cardinality) { + captured_func_(std::move(captured_func)) { input_->Ref(); } @@ -74,13 +69,14 @@ class TakeWhileDatasetOp : public UnaryDatasetOpKernel { const DataTypeVector& output_dtypes() const override { return input_->output_dtypes(); } + const std::vector& output_shapes() const override { return input_->output_shapes(); } string DebugString() const override { return "TakeWhileDatasetOp::Dataset"; } - int64 Cardinality() const override { return input_->Cardinality(); } + int64 Cardinality() const override { return kUnknownCardinality; } protected: Status AsGraphDefInternal(SerializationContext* ctx, @@ -112,16 +108,11 @@ class TakeWhileDatasetOp : public UnaryDatasetOpKernel { AttrValue other_arguments_types_attr; b->BuildAttrValue(other_arguments_types, &other_arguments_types_attr); - AttrValue preserve_cardinality_attr; - b->BuildAttrValue(preserve_cardinality_, &preserve_cardinality_attr); - TF_RETURN_IF_ERROR(b->AddDataset( this, {std::make_pair(0, input_node)}, {std::make_pair(1, other_arguments)}, {std::make_pair("predicate", f_attr), - std::make_pair("Targuments", other_arguments_types_attr), - std::make_pair("preserve_cardinality", - preserve_cardinality_attr)}, + std::make_pair("Targuments", other_arguments_types_attr)}, output)); return Status::OK(); } @@ -168,27 +159,12 @@ class TakeWhileDatasetOp : public UnaryDatasetOpKernel { return errors::InvalidArgument( "`predicate` must returns a scalar bool tensor."); } - auto cond = bool_output[0].scalar()(); - if (!cond) { // predicate is false - *end_of_sequence = true; - return Status::OK(); - } - } else if (errors::IsOutOfRange(s)) { - if (dataset()->preserve_cardinality_) { - // To guarantee that the transformation preserves the cardinality of - // the dataset, we convert `OutOfRange` to `InvalidArgument` as the - // former may be interpreted by a caller as the end of sequence. - return errors::InvalidArgument( - "Function invocation produced OutOfRangeError: ", - s.error_message()); - } else { - // `f` may deliberately raise `errors::OutOfRange` to indicate - // that we should terminate the iteration early. + if (!bool_output[0].scalar()()) { *end_of_sequence = true; return Status::OK(); } } - return s; + return s; // propagate error to caller } protected: @@ -227,11 +203,9 @@ class TakeWhileDatasetOp : public UnaryDatasetOpKernel { const DatasetBase* const input_; const NameAttrList func_; const std::unique_ptr captured_func_; - const bool preserve_cardinality_; }; NameAttrList func_; - bool preserve_cardinality_; }; REGISTER_KERNEL_BUILDER(Name("ExperimentalTakeWhileDataset").Device(DEVICE_CPU), diff --git a/tensorflow/core/ops/experimental_dataset_ops.cc b/tensorflow/core/ops/experimental_dataset_ops.cc index d10a13f06f..9ac50ffc72 100644 --- a/tensorflow/core/ops/experimental_dataset_ops.cc +++ b/tensorflow/core/ops/experimental_dataset_ops.cc @@ -360,7 +360,6 @@ REGISTER_OP("ExperimentalTakeWhileDataset") .Attr("Targuments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") - .Attr("preserve_cardinality: bool = false") .SetShapeFn(shape_inference::ScalarShape); REGISTER_OP("ExperimentalUnbatchDataset") diff --git a/tensorflow/python/data/experimental/ops/take_while_ops.py b/tensorflow/python/data/experimental/ops/take_while_ops.py index edbecd0666..224eb8aa77 100644 --- a/tensorflow/python/data/experimental/ops/take_while_ops.py +++ b/tensorflow/python/data/experimental/ops/take_while_ops.py @@ -1,4 +1,4 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ class _TakeWhileDataset(dataset_ops.UnaryUnchangedStructureDataset): self._input_dataset = input_dataset wrapped_func = dataset_ops.StructuredFunctionWrapper( predicate, - self._transformation_name(), + "tf.data.experimental.take_while()", dataset=self._input_dataset) if not wrapped_func.output_structure.is_compatible_with( @@ -49,16 +49,12 @@ class _TakeWhileDataset(dataset_ops.UnaryUnchangedStructureDataset): self._input_dataset._variant_tensor, other_arguments=self._predicate.function.captured_inputs, predicate=self._predicate.function, - preserve_cardinality=True, **dataset_ops.flat_structure(self)) super(_TakeWhileDataset, self).__init__(input_dataset, variant_tensor) def _functions(self): return [self._predicate] - def _transformation_name(self): - return "tf.data.experimental.take_while()" - @tf_export("data.experimental.take_while") def take_while(predicate): -- GitLab From b9abbc5424e6e883a1ec481344e3b080a29c49b8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 1 Jan 2019 09:05:30 -0800 Subject: [PATCH 0105/1095] More `TFGAN` to `TF-GAN`. PiperOrigin-RevId: 227437819 --- tensorflow/contrib/gan/python/eval/__init__.py | 2 +- tensorflow/contrib/gan/python/eval/python/classifier_metrics.py | 2 +- .../contrib/gan/python/eval/python/classifier_metrics_test.py | 2 +- tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py | 2 +- tensorflow/contrib/gan/python/eval/python/summaries.py | 2 +- tensorflow/contrib/gan/python/eval/python/summaries_impl.py | 2 +- tensorflow/contrib/gan/python/eval/python/summaries_test.py | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/gan/python/eval/__init__.py b/tensorflow/contrib/gan/python/eval/__init__.py index f86b851305..92e9abf8a3 100644 --- a/tensorflow/contrib/gan/python/eval/__init__.py +++ b/tensorflow/contrib/gan/python/eval/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TFGAN evaluation module. +"""TF-GAN evaluation module. This module supports techniques such as Inception Score, Frechet Inception distance, and Sliced Wasserstein distance. diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics.py index 1c872626a9..a52e899114 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Model evaluation tools for TFGAN.""" +"""Model evaluation tools for TF-GAN.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py index dbff1d2a36..ab2afb21fd 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for TFGAN classifier_metrics.""" +"""Tests for TF-GAN classifier_metrics.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py index 523968bed9..326fcb3cdb 100644 --- a/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py +++ b/tensorflow/contrib/gan/python/eval/python/sliced_wasserstein.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Model evaluation tools for TFGAN.""" +"""Model evaluation tools for TF-GAN.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/eval/python/summaries.py b/tensorflow/contrib/gan/python/eval/python/summaries.py index ecfdb39499..1b202dfc97 100644 --- a/tensorflow/contrib/gan/python/eval/python/summaries.py +++ b/tensorflow/contrib/gan/python/eval/python/summaries.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Common TFGAN summaries.""" +"""Common TF-GAN summaries.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/eval/python/summaries_impl.py b/tensorflow/contrib/gan/python/eval/python/summaries_impl.py index f9995bb19d..9f448d3a16 100644 --- a/tensorflow/contrib/gan/python/eval/python/summaries_impl.py +++ b/tensorflow/contrib/gan/python/eval/python/summaries_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Common TFGAN summaries.""" +"""Common TF-GAN summaries.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/eval/python/summaries_test.py b/tensorflow/contrib/gan/python/eval/python/summaries_test.py index 54a6f8d4d9..53fc7cb8ed 100644 --- a/tensorflow/contrib/gan/python/eval/python/summaries_test.py +++ b/tensorflow/contrib/gan/python/eval/python/summaries_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for TFGAN summaries.""" +"""Tests for TF-GAN summaries.""" from __future__ import absolute_import from __future__ import division -- GitLab From 07ab5fa7f9dab3fb644a0261382c95e5be072741 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 1 Jan 2019 09:36:41 -0800 Subject: [PATCH 0106/1095] assertTrue(isinstance(xxx)) -> assertIsInstance PiperOrigin-RevId: 227439314 --- .../python/eval/python/classifier_metrics_test.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py index ab2afb21fd..bd17571a05 100644 --- a/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py +++ b/tensorflow/contrib/gan/python/eval/python/classifier_metrics_test.py @@ -234,7 +234,7 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase): else: logits = classifier_metrics.run_inception(img, _get_dummy_graphdef()) - self.assertTrue(isinstance(logits, ops.Tensor)) + self.assertIsInstance(logits, ops.Tensor) logits.shape.assert_is_compatible_with([batch_size, 1001]) # Check that none of the model variables are trainable. @@ -258,7 +258,7 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase): img, _get_dummy_graphdef(), output_tensor=classifier_metrics.INCEPTION_FINAL_POOL) - self.assertTrue(isinstance(pool, ops.Tensor)) + self.assertIsInstance(pool, ops.Tensor) pool.shape.assert_is_compatible_with([batch_size, 2048]) # Check that none of the model variables are trainable. @@ -276,8 +276,8 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase): classifier_metrics.INCEPTION_FINAL_POOL ]) - self.assertTrue(isinstance(logits, ops.Tensor)) - self.assertTrue(isinstance(pool, ops.Tensor)) + self.assertIsInstance(logits, ops.Tensor) + self.assertIsInstance(pool, ops.Tensor) logits.shape.assert_is_compatible_with([batch_size, 1001]) pool.shape.assert_is_compatible_with([batch_size, 2048]) @@ -290,7 +290,7 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase): classifier_metrics.inception_score, array_ops.zeros([6, 299, 299, 3]), num_batches=3) - self.assertTrue(isinstance(score, ops.Tensor)) + self.assertIsInstance(score, ops.Tensor) score.shape.assert_has_rank(0) # Check that none of the model variables are trainable. @@ -302,7 +302,7 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase): distance = _run_with_mock( classifier_metrics.frechet_inception_distance, img, img) - self.assertTrue(isinstance(distance, ops.Tensor)) + self.assertIsInstance(distance, ops.Tensor) distance.shape.assert_has_rank(0) # Check that none of the model variables are trainable. @@ -314,7 +314,7 @@ class ClassifierMetricsTest(test.TestCase, parameterized.TestCase): distance = _run_with_mock(classifier_metrics.kernel_inception_distance, img, img) - self.assertTrue(isinstance(distance, ops.Tensor)) + self.assertIsInstance(distance, ops.Tensor) distance.shape.assert_has_rank(0) # Check that none of the model variables are trainable. -- GitLab From 4e07d4ae73de5632bb49514a5b486140bdd8516e Mon Sep 17 00:00:00 2001 From: Dheeraj Rajaram Reddy Date: Tue, 1 Jan 2019 23:47:28 +0530 Subject: [PATCH 0107/1095] Add take_while serialization test --- .../take_while_dataset_serialization_test.py | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tensorflow/python/data/experimental/kernel_tests/serialization/take_while_dataset_serialization_test.py diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/take_while_dataset_serialization_test.py b/tensorflow/python/data/experimental/kernel_tests/serialization/take_while_dataset_serialization_test.py new file mode 100644 index 0000000000..9b5498cca5 --- /dev/null +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/take_while_dataset_serialization_test.py @@ -0,0 +1,45 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the TakeWhileDataset serialization.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.experimental.kernel_tests.serialization import dataset_serialization_test_base +from tensorflow.python.data.experimental.ops import take_while_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.platform import test + + +class TakeWhileDatasetSerializationTest( + dataset_serialization_test_base.DatasetSerializationTestBase): + + def _build_dataset(self, num_elements, upper_bound): + return dataset_ops.Dataset.range(num_elements).apply( + take_while_ops.take_while(lambda x: x < upper_bound)) + + def testCore(self): + def run_test(num_elem1, num_elem2, upper_bound): + self.run_core_tests(lambda: self._build_dataset(num_elem1, upper_bound), + lambda: self._build_dataset(num_elem2, upper_bound), + upper_bound) + + run_test(23, 10, 7) + run_test(10, 50, 0) + run_test(25, 30, 25) + + +if __name__ == "__main__": + test.main() -- GitLab From d1959dddd107c79db8c8d33d0a37ebc9fe8b0a21 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Tue, 1 Jan 2019 13:19:42 -0800 Subject: [PATCH 0108/1095] Remove explicit dependencies tests have on keras-related targets. Changing all of these deps to be on a single large target. This is because when we refactor the build files we will want only a few public targets that things outside of Keras can depend on. And, it will make copybara transforms much easier (can swap out single target with dummy in opensource). PiperOrigin-RevId: 227449197 --- tensorflow/compiler/tests/BUILD | 3 +++ tensorflow/contrib/checkpoint/python/BUILD | 22 +++++++-------- tensorflow/contrib/feature_column/BUILD | 31 ++++++++++------------ tensorflow/python/BUILD | 7 +---- 4 files changed, 29 insertions(+), 34 deletions(-) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index fa02cf9cbe..f80cb1812f 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -230,6 +230,7 @@ tf_xla_py_test( "//tensorflow/python:framework", "//tensorflow/python:platform_test", "//tensorflow/python:random_ops", + "//tensorflow/python:standard_ops", ], ) @@ -677,6 +678,7 @@ tf_xla_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", "//tensorflow/python:random_ops", + "//tensorflow/python:standard_ops", ], ) @@ -826,6 +828,7 @@ tf_xla_py_test( ":xla_test", "//tensorflow/python:framework", "//tensorflow/python:platform_test", + "//tensorflow/python:standard_ops", "//tensorflow/python:stateless_random_ops", ], ) diff --git a/tensorflow/contrib/checkpoint/python/BUILD b/tensorflow/contrib/checkpoint/python/BUILD index ada4168726..4e529322c7 100644 --- a/tensorflow/contrib/checkpoint/python/BUILD +++ b/tensorflow/contrib/checkpoint/python/BUILD @@ -2,7 +2,7 @@ licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//tensorflow:internal"]) -load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_test") py_library( name = "checkpoint", @@ -27,17 +27,17 @@ py_library( ], ) -py_test( +tf_py_test( name = "containers_test", srcs = ["containers_test.py"], - deps = [ + additional_deps = [ ":containers", + "@six_archive//:six", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", "//tensorflow/python:resource_variable_ops", "//tensorflow/python/training/checkpointable:base", "//tensorflow/python/training/checkpointable:util", - "@six_archive//:six", ], ) @@ -53,18 +53,18 @@ py_library( ], ) -py_test( +tf_py_test( name = "python_state_test", srcs = ["python_state_test.py"], - deps = [ + additional_deps = [ ":python_state", + "//third_party/py/numpy", "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", "//tensorflow/python:session", "//tensorflow/python:variables", "//tensorflow/python/eager:test", "//tensorflow/python/training/checkpointable:util", - "//third_party/py/numpy", ], ) @@ -80,10 +80,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "split_dependency_test", srcs = ["split_dependency_test.py"], - deps = [ + additional_deps = [ ":split_dependency", "//tensorflow/python:array_ops", "//tensorflow/python:framework_test_lib", @@ -106,10 +106,10 @@ py_library( ], ) -py_test( +tf_py_test( name = "visualize_test", srcs = ["visualize_test.py"], - deps = [ + additional_deps = [ ":visualize", "//tensorflow/python:constant_op", "//tensorflow/python:resource_variable_ops", diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index 4c1d1a29f2..4e29e25599 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -6,7 +6,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_test") py_library( name = "feature_column_py", @@ -37,13 +37,13 @@ py_library( ], ) -py_test( +tf_py_test( name = "sequence_feature_column_test", srcs = ["python/feature_column/sequence_feature_column_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ + additional_deps = [ ":sequence_feature_column", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", @@ -53,17 +53,14 @@ py_test( "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", "//tensorflow/python/feature_column:feature_column_py", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], + tags = ["no_pip"], ) -py_test( +tf_py_test( name = "sequence_feature_column_integration_test", srcs = ["python/feature_column/sequence_feature_column_integration_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ + additional_deps = [ ":sequence_feature_column", "//tensorflow/python:client_testlib", "//tensorflow/python:framework_ops", @@ -73,6 +70,7 @@ py_test( "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/keras:layers", ], + tags = ["no_pip"], ) py_library( @@ -94,14 +92,14 @@ py_library( ], ) -py_test( +tf_py_test( name = "sequence_feature_column_v2_test", srcs = ["python/feature_column/sequence_feature_column_v2_test.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - deps = [ + additional_deps = [ ":sequence_feature_column", ":sequence_feature_column_v2", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", @@ -112,7 +110,6 @@ py_test( "//tensorflow/python:training", "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/feature_column:feature_column_v2_test", - "//third_party/py/numpy", - "@absl_py//absl/testing:parameterized", ], + tags = ["no_pip"], ) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 7503c899aa..6724177d17 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1067,6 +1067,7 @@ py_library( name = "extra_py_tests_deps", srcs_version = "PY2AND3", deps = [ + ":keras_lib", "//third_party/py/numpy", "@six_archive//:six", ], @@ -1081,7 +1082,6 @@ py_library( ":client", ":errors", ":framework_for_generated_wrappers", - ":layers_base", ":platform", ":platform_test", ":pywrap_tensorflow", @@ -1096,7 +1096,6 @@ py_library( "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/eager:tape", - "//tensorflow/python/keras:layers", "//third_party/py/numpy", "@six_archive//:six", ], @@ -3311,7 +3310,6 @@ cuda_py_test( ":framework_test_lib", ":functional_ops", ":gradients", - ":layers", ":list_ops", ":math_grad", ":math_ops", @@ -3708,7 +3706,6 @@ tf_py_test( ":framework", ":framework_for_generated_wrappers", ":framework_test_lib", - ":layers", ":math_ops", ":metrics", ":platform", @@ -4982,7 +4979,6 @@ tf_py_test( ":variable_scope", ":variables", "//third_party/py/numpy", - "//tensorflow/python/feature_column:feature_column_py", ], ) @@ -5753,7 +5749,6 @@ cuda_py_test( ":constant_op", ":dtypes", ":functional_ops", - ":layers", ":math_ops", ":nn", ":ops", -- GitLab From efb14346f9344db2f07c94428ee86f86100720b0 Mon Sep 17 00:00:00 2001 From: Ruoxin Sang Date: Tue, 1 Jan 2019 17:58:48 -0800 Subject: [PATCH 0109/1095] Set `_is_compiled` attribute for Keras custom compile for predict. PiperOrigin-RevId: 227461814 --- .../python/keras/engine/distributed_training_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/keras/engine/distributed_training_utils.py b/tensorflow/python/keras/engine/distributed_training_utils.py index a4fc2cf196..231c59ba93 100644 --- a/tensorflow/python/keras/engine/distributed_training_utils.py +++ b/tensorflow/python/keras/engine/distributed_training_utils.py @@ -671,6 +671,12 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode): def _custom_compile_for_predict(model): """Custom compile for TPU predict mode.""" + if not model.built: + # Model is not compilable because it does not know its number of inputs + # and outputs, nor their shapes and names. We will compile after the first + # time the model gets called on training data. + return + model._is_compiled = True model.total_loss = None model._fit_function = None model._eval_function = None -- GitLab From b277e326766ac3a0eff3e72e9e53dadcc31674e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 1 Jan 2019 23:48:05 -0800 Subject: [PATCH 0110/1095] TFGAN -> TF-GAN PiperOrigin-RevId: 227481763 --- tensorflow/contrib/gan/python/estimator/__init__.py | 2 +- .../contrib/gan/python/estimator/python/gan_estimator_test.py | 2 +- tensorflow/contrib/gan/python/estimator/python/head_impl.py | 2 +- tensorflow/contrib/gan/python/estimator/python/head_test.py | 2 +- .../gan/python/estimator/python/stargan_estimator_impl.py | 2 +- .../gan/python/estimator/python/stargan_estimator_test.py | 2 +- .../gan/python/estimator/python/tpu_gan_estimator_impl.py | 2 +- .../gan/python/estimator/python/tpu_gan_estimator_test.py | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/gan/python/estimator/__init__.py b/tensorflow/contrib/gan/python/estimator/__init__.py index 75cccb5ea0..c1433749c1 100644 --- a/tensorflow/contrib/gan/python/estimator/__init__.py +++ b/tensorflow/contrib/gan/python/estimator/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TFGAN estimator module. +"""TF-GAN estimator module. GANEstimator provides all the infrastructure support of a TensorFlow Estimator with the feature support of TFGAN. diff --git a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py index 5a3d29cf0b..5b9c54e43a 100644 --- a/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/gan_estimator_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for TFGAN's estimator.py.""" +"""Tests for TF-GAN's estimator.py.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index 1a0ee6dfc4..cbe990b476 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A TFGAN-backed GAN Estimator.""" +"""A TF-GAN-backed GAN Estimator.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/estimator/python/head_test.py b/tensorflow/contrib/gan/python/estimator/python/head_test.py index 8205bc889d..5b50234a0e 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for TFGAN's head.py.""" +"""Tests for TF-GAN's head.py.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_impl.py index f60e16bc04..2a485e7d47 100644 --- a/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A TFGAN-backed StarGAN Estimator.""" +"""A TF-GAN-backed StarGAN Estimator.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py index 2ec7938c7c..00ca393c51 100644 --- a/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for TFGAN's stargan_estimator.py.""" +"""Tests for TF-GAN's stargan_estimator.py.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py b/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py index bf51b7fc45..8f2a22c78a 100644 --- a/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_impl.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A TFGAN-backed GAN Estimator that works on TPU.""" +"""A TF-GAN-backed GAN Estimator that works on TPU.""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_test.py index 0a08b4386f..9fdcc08334 100644 --- a/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/tpu_gan_estimator_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for TFGAN's TPU Estimator.""" +"""Tests for TF-GAN's TPU Estimator.""" from __future__ import absolute_import from __future__ import division -- GitLab From 5ed6676c75192429132bbc0da179ad3e3367089c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 01:02:25 -0800 Subject: [PATCH 0111/1095] compat: Update forward compatibility horizon to 2019-01-02 PiperOrigin-RevId: 227487773 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 2284d0021c..949bb0dc0c 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -27,7 +27,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 1, 1) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 1, 2) @tf_export("compat.forward_compatible") -- GitLab From 26d81c48681c17fbfe1c586b54512d8a89062183 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 02:58:21 -0800 Subject: [PATCH 0112/1095] Improve code generated by scatter expander After this change if the indexes dimension is 1 then we remove it and treat the scatter as a scatter with scalar indices. PiperOrigin-RevId: 227497329 --- tensorflow/compiler/xla/service/scatter_expander.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/scatter_expander.cc b/tensorflow/compiler/xla/service/scatter_expander.cc index e8496dbd72..8b9955faf8 100644 --- a/tensorflow/compiler/xla/service/scatter_expander.cc +++ b/tensorflow/compiler/xla/service/scatter_expander.cc @@ -26,7 +26,6 @@ limitations under the License. namespace xla { - // Transposes the given scatter_indices such that the index_vector_dim becomes // the most-minor dimension. static StatusOr TransposeIndexVectorDimToLast( @@ -60,6 +59,13 @@ static StatusOr CanonicalizeScatterIndices( TF_ASSIGN_OR_RETURN( HloInstruction * transposed_scatter_indices, TransposeIndexVectorDimToLast(scatter_indices, index_vector_dim)); + if (ShapeUtil::Rank(scatter_indices->shape()) == index_vector_dim + 1 && + scatter_indices->shape().dimensions(index_vector_dim) == 1) { + auto new_shape = + ShapeUtil::DeleteDimension(index_vector_dim, scatter_indices->shape()); + TF_ASSIGN_OR_RETURN(scatter_indices, + MakeReshapeHlo(new_shape, scatter_indices)); + } bool indices_are_scalar = index_vector_dim == scatter_indices->shape().dimensions_size(); @@ -214,9 +220,6 @@ static StatusOr> ScatterLoopBody( HloInstruction* updates = loop_state[2]; bool has_scalar_indices = scatter_indices->shape().dimensions_size() == 1; - CHECK_EQ(has_scalar_indices, - dim_numbers.index_vector_dim() == - scatter->operand(1)->shape().dimensions_size()); // Build a vector form of the induction variable of the while loop. TF_ASSIGN_OR_RETURN( -- GitLab From 2b5cbcdd8d84d9673c9f17b4400ec6d70e2c8d73 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 04:33:33 -0800 Subject: [PATCH 0113/1095] More `TFGAN` -> `TF-GAN`. PiperOrigin-RevId: 227504393 --- tensorflow/contrib/gan/python/estimator/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/gan/python/estimator/__init__.py b/tensorflow/contrib/gan/python/estimator/__init__.py index c1433749c1..03a639165b 100644 --- a/tensorflow/contrib/gan/python/estimator/__init__.py +++ b/tensorflow/contrib/gan/python/estimator/__init__.py @@ -15,7 +15,7 @@ """TF-GAN estimator module. GANEstimator provides all the infrastructure support of a TensorFlow Estimator -with the feature support of TFGAN. +with the feature support of TF-GAN. """ from __future__ import absolute_import -- GitLab From 8ba62643f9e33f6684252f68ffd8c282d96a7262 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 05:14:40 -0800 Subject: [PATCH 0114/1095] Ran build_cleaner. PiperOrigin-RevId: 227507517 --- tensorflow/contrib/gan/BUILD | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index 0fa229880b..97184dabb0 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -569,22 +569,18 @@ py_test( deps = [ ":namedtuples", ":stargan_estimator", - ":tuple_losses", "//tensorflow/contrib/layers:layers_py", - "//tensorflow/contrib/learn", - "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:metrics", - "//tensorflow/python:parsing_ops", "//tensorflow/python:summary", "//tensorflow/python:training", "//tensorflow/python:training_util", "//tensorflow/python:variable_scope", - "//tensorflow/python/estimator:estimator_py", + "//tensorflow/python/estimator:model_fn", + "//tensorflow/python/estimator:numpy_io", "//third_party/py/numpy", "@absl_py//absl/testing:parameterized", "@six_archive//:six", -- GitLab From 7700b1ebeb981b025eccc6e115b2209107d8b8fa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 05:22:10 -0800 Subject: [PATCH 0115/1095] Fix lint warnings. PiperOrigin-RevId: 227508006 --- .../gan/python/estimator/python/stargan_estimator_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py b/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py index 00ca393c51..c00ff43997 100644 --- a/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py +++ b/tensorflow/contrib/gan/python/estimator/python/stargan_estimator_test.py @@ -80,7 +80,7 @@ class StarGetGANModelTest(test.TestCase, parameterized.TestCase): self.assertEqual(input_data, gan_model.input_data) self.assertIsNotNone(gan_model.generated_data) self.assertIsNotNone(gan_model.generated_data_domain_target) - self.assertEqual(1, len(gan_model.generator_variables)) + self.assertLen(gan_model.generator_variables, 1) self.assertIsNotNone(gan_model.generator_scope) self.assertIsNotNone(gan_model.generator_fn) if mode == model_fn_lib.ModeKeys.PREDICT: @@ -109,7 +109,7 @@ class StarGetGANModelTest(test.TestCase, parameterized.TestCase): gan_model.discriminator_input_data_domain_predication) self.assertIsNotNone( gan_model.discriminator_generated_data_domain_predication) - self.assertEqual(2, len(gan_model.discriminator_variables)) # 1 FC layer + self.assertLen(gan_model.discriminator_variables, 2) # 1 FC layer self.assertIsNotNone(gan_model.discriminator_scope) self.assertIsNotNone(gan_model.discriminator_fn) @@ -163,6 +163,7 @@ class GetEstimatorSpecTest(test.TestCase, parameterized.TestCase): @classmethod def setUpClass(cls): + super(GetEstimatorSpecTest, cls).setUpClass() cls._generator_optimizer = training.GradientDescentOptimizer(1.0) cls._discriminator_optimizer = training.GradientDescentOptimizer(1.0) -- GitLab From 44442969c0596728dbdb98035bffc2baf1790f13 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 05:25:21 -0800 Subject: [PATCH 0116/1095] Clang breaks with the libstdc++ version on ubuntu 14.04, so install a newer libstdc++, like we do for the CUDA 9 image already. PiperOrigin-RevId: 227508237 --- .../Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04 | 13 +++++++++++-- third_party/toolchains/BUILD | 2 +- .../toolchains/preconfig/generate/containers.bzl | 2 +- .../toolchains/preconfig/generate/workspace.bzl | 2 +- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04 b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04 index 4fe86066c9..553fbb2860 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04 +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04 @@ -1,7 +1,7 @@ # To push a new version, run: # $ docker build -f Dockerfile.rbe.cuda10.0-cudnn7-ubuntu14.04 \ -# --tag "gcr.io/asci-toolchain/nosla-cuda10.0-cudnn7-ubuntu14.04" . -# $ docker push gcr.io/asci-toolchain/nosla-cuda10.0-cudnn7-ubuntu14.04 +# --tag "gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04" . +# $ docker push gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04 FROM ubuntu:14.04 LABEL maintainer="Manuel Klimek " @@ -68,6 +68,15 @@ RUN ln -s libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 RUN ln -s /usr/lib/x86_64-linux-gnu/libnccl.so /usr/lib/libnccl.so \ && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so /usr/lib/libnccl.so.2 +# Install a newer version of libstdc++, as new clang versions do not work +# with the stock ubuntu 14.04 libstdc++. +RUN apt-get update && \ + apt-get install -y software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/test -y && \ + apt-get update && \ + apt-get install -y libstdc++-7-dev && \ + rm -rf /var/lib/apt/lists/* + # Copy and run the install scripts. COPY install/*.sh /install/ ARG DEBIAN_FRONTEND=noninteractive diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD index 9da417fd5f..28f1e6979c 100644 --- a/third_party/toolchains/BUILD +++ b/third_party/toolchains/BUILD @@ -45,6 +45,6 @@ platform( remote_execution_properties = """ properties: { name: "container-image" - value:"docker://gcr.io/asci-toolchain/nosla-cuda10.0-cudnn7-ubuntu14.04@%s" + value:"docker://gcr.io/tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04@%s" }""" % container_digests["cuda10.0-cudnn7-ubuntu14.04"], ) diff --git a/third_party/toolchains/preconfig/generate/containers.bzl b/third_party/toolchains/preconfig/generate/containers.bzl index c56c6f3346..32d5f29b52 100644 --- a/third_party/toolchains/preconfig/generate/containers.bzl +++ b/third_party/toolchains/preconfig/generate/containers.bzl @@ -1,4 +1,4 @@ container_digests = { "cuda9.0-cudnn7-ubuntu14.04": "sha256:c43ed5341dd765042e0bbd1bf50fadeedd649d1e0c34d81999cb6ce30916cb95", - "cuda10.0-cudnn7-ubuntu14.04": "sha256:919e75247743ae1244d5d72ee9f18090379d4a9035e5853010f6d59d87cd2e8b", + "cuda10.0-cudnn7-ubuntu14.04": "sha256:e36f05f1ff39e39ddf07122e37f2b1895948bb6f7acc3db37a3c496be5e66228", } diff --git a/third_party/toolchains/preconfig/generate/workspace.bzl b/third_party/toolchains/preconfig/generate/workspace.bzl index f30c2f1ae6..a3268585e1 100644 --- a/third_party/toolchains/preconfig/generate/workspace.bzl +++ b/third_party/toolchains/preconfig/generate/workspace.bzl @@ -18,7 +18,7 @@ def _remote_config_workspace(): container_pull( name = "cuda10.0-cudnn7-ubuntu14.04", registry = "gcr.io", - repository = "asci-toolchain/nosla-cuda10.0-cudnn7-ubuntu14.04", + repository = "tensorflow-testing/nosla-cuda10.0-cudnn7-ubuntu14.04", digest = container_digests["cuda10.0-cudnn7-ubuntu14.04"], ) -- GitLab From ab7465e957d2f5b6f8ada982cf91266629092677 Mon Sep 17 00:00:00 2001 From: Dheeraj Rajaram Reddy Date: Wed, 2 Jan 2019 19:23:57 +0530 Subject: [PATCH 0117/1095] Add take_while tests --- .../data/experimental/kernel_tests/BUILD | 22 +++++ .../kernel_tests/serialization/BUILD | 18 ++++ .../kernel_tests/take_while_test.py | 94 +++++++++++++++++++ 3 files changed, 134 insertions(+) create mode 100644 tensorflow/python/data/experimental/kernel_tests/take_while_test.py diff --git a/tensorflow/python/data/experimental/kernel_tests/BUILD b/tensorflow/python/data/experimental/kernel_tests/BUILD index 9362a3e8eb..b4196685e2 100644 --- a/tensorflow/python/data/experimental/kernel_tests/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/BUILD @@ -638,6 +638,28 @@ py_library( ], ) +py_test( + name = "take_while_test", + size = "small", + srcs = ["take_while_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:script_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python/data/experimental/ops:take_while_ops", + "//tensorflow/python/data/kernel_tests:test_base", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/eager:context", + "//third_party/py/numpy", + ], +) + py_test( name = "tf_record_writer_test", size = "small", diff --git a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD index 4a2e28f496..00b05e4d30 100644 --- a/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD +++ b/tensorflow/python/data/experimental/kernel_tests/serialization/BUILD @@ -666,6 +666,24 @@ py_test( ], ) +py_test( + name = "take_while_dataset_serialization_test", + size = "small", + srcs = ["take_while_dataset_serialization_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_oss", + "no_pip", + "no_windows", + ], + deps = [ + ":dataset_serialization_test_base", + "//tensorflow/python:client_testlib", + "//tensorflow/python/data/experimental/ops:take_while_ops", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + py_test( name = "textline_dataset_serialization_test", size = "medium", diff --git a/tensorflow/python/data/experimental/kernel_tests/take_while_test.py b/tensorflow/python/data/experimental/kernel_tests/take_while_test.py new file mode 100644 index 0000000000..d561a3cd35 --- /dev/null +++ b/tensorflow/python/data/experimental/kernel_tests/take_while_test.py @@ -0,0 +1,94 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for `tf.data.experimental.Dataset.take_while()`.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.data.kernel_tests import test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.experimental.ops import take_while_ops +from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util +from tensorflow.python.framework import constant_op +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import functional_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +@test_util.run_all_in_graph_and_eager_modes +class TakeWhileTest(test_base.DatasetTestBase): + + def testTakeWhileDataset(self): + def do_test(num_elements, window_size): + def predicate_func(elem): + return array_ops.shape(elem)[0] > (window_size - 1) + + flatten_func = lambda x: dataset_ops.Dataset.from_tensor_slices(x) + take_while = take_while_ops.take_while(predicate_func) + + dataset = dataset_ops.Dataset.range(num_elements).batch(window_size) + dataset = dataset.apply(take_while).flat_map(flatten_func) + + self.assertDatasetProduces(dataset, + np.arange(int(num_elements / window_size) * window_size)) + + do_test(14, 2) + do_test(15, 2) + do_test(100, 3) + + def testTakeWhileDatasetRange(self): + def get_dataset(num_elemets, upper_bound): + return dataset_ops.Dataset.range(num_elemets).apply( + take_while_ops.take_while(lambda x: x < upper_bound)) + + def do_test(num_elemets, upper_bound): + self.assertDatasetProduces(get_dataset(num_elemets, upper_bound), + np.arange(upper_bound)) + + def out_of_bounds(num_elemets, upper_bound): + with self.assertRaises(errors.OutOfRangeError): + self.assertDatasetProduces(get_dataset(num_elemets, upper_bound), + np.arange(upper_bound)) + + do_test(10, 2) + do_test(16, 7) + do_test(100, 99) + out_of_bounds(100, 101) + out_of_bounds(0, 1) + + def testTakeWhileDatasetString(self): + def stringNotEquals(string): + return lambda x: math_ops.not_equal(x, constant_op.constant(string)) + + string = ["this", "is", "the", "test", "for", "strings"] + dataset = dataset_ops.Dataset.from_tensor_slices(string).apply( + take_while_ops.take_while(stringNotEquals("test"))) + + next_element = self.getNext(dataset) + self.assertEqual(b"this", self.evaluate(next_element())) + self.assertEqual(b"is", self.evaluate(next_element())) + self.assertEqual(b"the", self.evaluate(next_element())) + + with self.assertRaises(errors.OutOfRangeError): + self.assertEqual(b"test", self.evaluate(next_element())) + + +if __name__ == "__main__": + test.main() -- GitLab From 7c9c3b1f28fbf8666e47fa51dec4d52b898c32e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 08:44:47 -0800 Subject: [PATCH 0118/1095] Remove unused values from key-value sort HLOs If a value has no user then there is no good reason to keep it in the sort. Removing it will reduce memory traffic, memory usage and potentially improve performance. PiperOrigin-RevId: 227526906 --- tensorflow/compiler/xla/service/BUILD | 1 + .../xla/service/algebraic_simplifier.cc | 77 +++++++++++++++++++ .../xla/service/algebraic_simplifier_test.cc | 68 ++++++++++++++++ .../compiler/xla/service/pattern_matcher.h | 2 +- 4 files changed, 147 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 201646e70d..0db1d4a78d 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1579,6 +1579,7 @@ cc_library( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/core:lib", "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:inlined_vector", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index cad70a8d10..42a4291ce9 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -26,6 +26,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" @@ -369,6 +370,11 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { // Tries to convert slice(reshape(X)) into reshape(slice(X)) StatusOr TryToReorderSliceAndReshape(HloInstruction* slice); + // If the sort instruction has a tuple shape then looks for unused output + // values and removes them from the sort instruction. Returns true if the + // graph have been modified. + StatusOr RemoveUnusedOperandFromSort(HloInstruction* sort); + // Current HloComputation instance the AlgebraicSimplifierVisitor is // traversing. HloComputation* computation_; @@ -2814,6 +2820,69 @@ Status AlgebraicSimplifierVisitor::HandleSelect(HloInstruction* select) { return Status::OK(); } +StatusOr AlgebraicSimplifierVisitor::RemoveUnusedOperandFromSort( + HloInstruction* sort) { + if (!sort->shape().IsTuple()) { + return false; + } + + if (sort->parent()->root_instruction() == sort) { + // Can't analyse users of the root instruction. + return false; + } + + // Index 0 is the sorting key used by the sort HLO itself. + absl::flat_hash_set used_indices{0}; + for (const HloInstruction* user : sort->users()) { + if (user->opcode() != HloOpcode::kGetTupleElement) { + // Can't analyse users other then get-tuple-element. + return false; + } + used_indices.insert(user->tuple_index()); + } + + if (used_indices.size() == sort->operand_count()) { + // All operands are used. + return false; + } + + std::vector operands{sort->mutable_operand(0)}; + std::vector new_shapes{sort->operand(0)->shape()}; + for (int64 i = 1; i < sort->operand_count(); ++i) { + if (used_indices.count(i)) { + operands.push_back(sort->mutable_operand(i)); + new_shapes.push_back(sort->operand(i)->shape()); + } + } + Shape new_sort_shape = new_shapes.size() == 1 + ? new_shapes[0] + : ShapeUtil::MakeTupleShape(new_shapes); + HloInstruction* new_sort = computation_->AddInstruction( + sort->CloneWithNewOperands(new_sort_shape, operands)); + + // Map from original get-tuple-element tuple index to new HLO instruction + absl::flat_hash_map result_map; + if (new_sort->shape().IsTuple()) { + // Old sort key maps to new sort key. + int64 new_index = 0; + for (int64 i = 0; i < sort->operand_count(); ++i) { + if (used_indices.count(i)) { + result_map[i] = + computation_->AddInstruction(HloInstruction::CreateGetTupleElement( + new_shapes[new_index], new_sort, new_index)); + ++new_index; + } + } + } else { + result_map[0] = new_sort; + } + for (HloInstruction* user : sort->users()) { + TF_RETURN_IF_ERROR( + user->ReplaceAllUsesWith(result_map.at(user->tuple_index()))); + } + return true; +} + Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) { auto operand = sort->mutable_operand(0); int64 dimension_to_sort = sort->dimensions(0); @@ -2826,6 +2895,14 @@ Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) { return ReplaceWithNewInstruction( sort, HloInstruction::CreateTuple(sort->operands())); } + + // Remove the unused values from a key-value sort. + TF_ASSIGN_OR_RETURN(bool removed_operand, RemoveUnusedOperandFromSort(sort)); + if (removed_operand) { + changed_ = true; + return Status::OK(); + } + if (!options_.enable_permutation_sort_replacement()) { return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 51ad748ff8..211c5bf05a 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2709,6 +2709,74 @@ TEST_F(AlgebraicSimplifierTest, DontReplacePermutationSortWrongDimensions) { EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); } +TEST_F(AlgebraicSimplifierTest, RemoveUnusedSortOperandArrayResult) { + const char* hlo_string = R"( + HloModule permutation_sort + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = s32[64,8732]{1,0} parameter(1) + sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values), + dimensions={1} + ROOT gte = f32[64,8732]{1,0} get-tuple-element(sort), index=0 + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifierOptions options(bitcasting_callback()); + AlgebraicSimplifier simplifier(options); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, GmockMatch(m::Sort(m::Parameter(0)))); +} + +TEST_F(AlgebraicSimplifierTest, RemoveUnusedSortOperandTuple) { + const char* hlo_string = R"( + HloModule permutation_sort + + ENTRY sort_computation { + keys = f32[64,87] parameter(0) + values.0 = s32[64,87] parameter(1) + values.1 = u32[64,87] parameter(2) + sort = (f32[64,87], s32[64,87], u32[64,87]) sort( + keys, values.0, values.1), + dimensions={1} + gte.0 = f32[64,87] get-tuple-element(sort), index=0 + gte.1 = u32[64,87] get-tuple-element(sort), index=2 + ROOT tuple = (f32[64,87], u32[64,87]) tuple(gte.0, gte.1) + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifierOptions options(bitcasting_callback()); + AlgebraicSimplifier simplifier(options); + EXPECT_TRUE(simplifier.Run(module.get()).ValueOrDie()); + auto root = module->entry_computation()->root_instruction(); + EXPECT_THAT( + root, + GmockMatch(m::Tuple( + m::GetTupleElement(m::Sort(m::Parameter(0), m::Parameter(2)), 0), + m::GetTupleElement(m::Sort(m::Parameter(0), m::Parameter(2)), 1)))); +} + +TEST_F(AlgebraicSimplifierTest, DontRemoveUnusedSortKey) { + const char* hlo_string = R"( + HloModule permutation_sort + + ENTRY sort_computation { + keys = f32[64,8732]{1,0} parameter(0) + values = s32[64,8732]{1,0} parameter(1) + sort = (f32[64,8732]{1,0}, s32[64,8732]{1,0}) sort(keys, values), dimensions={1} + ROOT gte = s32[64,8732]{1,0} get-tuple-element(sort), index=1 + })"; + TF_ASSERT_OK_AND_ASSIGN(auto module, + ParseAndReturnVerifiedModule(hlo_string)); + + AlgebraicSimplifierOptions options(bitcasting_callback()); + AlgebraicSimplifier simplifier(options); + EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie()); +} + TEST_F(AlgebraicSimplifierTest, ReplaceEffectiveScalarKeyValueSortWithTuple) { auto builder = HloComputation::Builder(TestName()); diff --git a/tensorflow/compiler/xla/service/pattern_matcher.h b/tensorflow/compiler/xla/service/pattern_matcher.h index c362a60d94..198c6e1069 100644 --- a/tensorflow/compiler/xla/service/pattern_matcher.h +++ b/tensorflow/compiler/xla/service/pattern_matcher.h @@ -2057,7 +2057,6 @@ XLA_UNOP_PATTERN(SendDone) XLA_UNOP_PATTERN(Sign) XLA_UNOP_PATTERN(Sin) XLA_UNOP_PATTERN(Slice) -XLA_UNOP_PATTERN(Sort) XLA_UNOP_PATTERN(Tanh) XLA_UNOP_PATTERN(Transpose) #undef XLA_UNOP_PATTERN @@ -2238,6 +2237,7 @@ XLA_VARIADIC_OP_PATTERN(Concatenate); XLA_VARIADIC_OP_PATTERN(CustomCall); XLA_VARIADIC_OP_PATTERN(Map) XLA_VARIADIC_OP_PATTERN(Reduce); +XLA_VARIADIC_OP_PATTERN(Sort); XLA_VARIADIC_OP_PATTERN(Tuple); // Helpers for matching non-constant instructions. -- GitLab From 8fabc8091802e4edcd3051b8fba8fdf045388040 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 08:57:10 -0800 Subject: [PATCH 0119/1095] minor refactor PiperOrigin-RevId: 227528417 --- tensorflow/python/grappler/cluster.py | 29 +++++++++------------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/grappler/cluster.py b/tensorflow/python/grappler/cluster.py index 079d07115b..428b52402c 100644 --- a/tensorflow/python/grappler/cluster.py +++ b/tensorflow/python/grappler/cluster.py @@ -71,26 +71,21 @@ class Cluster(object): return self._tf_cluster def ListDevices(self): - """Returns the list of available hardware devices.""" - devices = [] - if self._tf_cluster is not None: - ret_from_swig = tf_cluster.TF_ListDevices(self._tf_cluster) - devices = [] - for raw_dev in ret_from_swig: - devices.append(device_properties_pb2.NamedDevice.FromString(raw_dev)) - return devices + """Returns a list of available hardware devices.""" + if self._tf_cluster is None: + return [] + return [device_properties_pb2.NamedDevice.FromString(device) + for device in tf_cluster.TF_ListDevices(self._tf_cluster)] def ListAvailableOps(self): - """Returns a list of all the available operations (sorted alphatically).""" + """Returns a list of all available operations (sorted alphabetically).""" return tf_cluster.TF_ListAvailableOps() def GetSupportedDevices(self, item): return tf_cluster.TF_GetSupportedDevices(self._tf_cluster, item.tf_item) def EstimatePerformance(self, device): - """Estimate the performance of the specified device.""" - serialized = device.SerializeToString() - return tf_cluster.TF_EstimatePerformance(serialized) + return tf_cluster.TF_EstimatePerformance(device.SerializeToString()) def MeasureCosts(self, item): """Returns the cost of running the specified item. @@ -107,10 +102,8 @@ class Cluster(object): return None op_perf_bytes_list, run_time, step_stats_bytes = ret_from_swig - op_perfs = [] - for op_perf_bytes in op_perf_bytes_list: - op_perfs.append( - op_performance_data_pb2.OpPerformance.FromString(op_perf_bytes)) + op_perfs = [op_performance_data_pb2.OpPerformance.FromString(op_perf_bytes) + for op_perf_bytes in op_perf_bytes_list] return (op_perfs, run_time, step_stats_pb2.StepStats.FromString(step_stats_bytes)) @@ -122,11 +115,9 @@ class Cluster(object): Returns: A hashtable indexed by device name. """ with errors.raise_exception_on_not_ok_status() as status: - ret_from_swig = tf_cluster.TF_DeterminePeakMemoryUsage( + return tf_cluster.TF_DeterminePeakMemoryUsage( item.tf_item, self._tf_cluster, status) - return ret_from_swig - @contextlib.contextmanager def Provision(allow_soft_placement=True, -- GitLab From b978ad9b3c0327d406301f1b32f6d6d11688ec6e Mon Sep 17 00:00:00 2001 From: Alexey Radul Date: Wed, 2 Jan 2019 08:59:37 -0800 Subject: [PATCH 0120/1095] Opportunistically teach test decorators in framework/test_util.py to return the decorated function's result, if any. This makes the most sense for decorators that run the decorated function no more than once, since that makes it obvious which result to return. This helps generalize them to sub-test or non-test scenarios where the decorated function may return something valuable. The change should be a no-op for tests, because they return None anyway, and None was what the decorators were unconditionally returning before this change. PiperOrigin-RevId: 227528698 --- tensorflow/python/framework/test_util.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 7a6ecaf5a8..21d21cc7f4 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -373,7 +373,7 @@ def skip_if(condition): else: skip = condition if not skip: - fn(*args, **kwargs) + return fn(*args, **kwargs) return wrapper @@ -410,7 +410,7 @@ def enable_control_flow_v2(fn): enable_control_flow_v2_old = control_flow_util.ENABLE_CONTROL_FLOW_V2 control_flow_util.ENABLE_CONTROL_FLOW_V2 = True try: - fn(*args, **kwargs) + return fn(*args, **kwargs) finally: control_flow_util.ENABLE_CONTROL_FLOW_V2 = enable_control_flow_v2_old @@ -594,9 +594,9 @@ def assert_no_new_tensors(f): ops.get_default_graph()._graph_key = outside_graph_key if outside_executed_eagerly: with context.eager_mode(): - f(self, **kwargs) + result = f(self, **kwargs) else: - f(self, **kwargs) + result = f(self, **kwargs) # Make an effort to clear caches, which would otherwise look like leaked # Tensors. context.context()._clear_caches() # pylint: disable=protected-access @@ -610,6 +610,7 @@ def assert_no_new_tensors(f): len(tensors_after), str(tensors_after), ))) + return result return decorator @@ -741,7 +742,7 @@ def assert_no_garbage_created(f): gc.set_debug(gc.DEBUG_SAVEALL) gc.collect() previous_garbage = len(gc.garbage) - f(self, **kwargs) + result = f(self, **kwargs) gc.collect() new_garbage = len(gc.garbage) if new_garbage > previous_garbage: @@ -786,6 +787,7 @@ def assert_no_garbage_created(f): # not hold on to every object in other tests. gc.set_debug(previous_debug_flags) gc.enable() + return result return decorator @@ -1074,9 +1076,9 @@ def deprecated_graph_mode_only(func=None): def decorated(self, *args, **kwargs): if tf2.enabled(): with context.graph_mode(): - f(self, *args, **kwargs) + return f(self, *args, **kwargs) else: - f(self, *args, **kwargs) + return f(self, *args, **kwargs) return decorated @@ -1126,7 +1128,7 @@ def run_v1_only(reason, func=None): if tf2.enabled(): self.skipTest(reason) - f(self, *args, **kwargs) + return f(self, *args, **kwargs) return decorated @@ -1163,7 +1165,7 @@ def run_v2_only(func=None): if not tf2.enabled(): self.skipTest("Test is only comptaible in v2") - f(self, *args, **kwargs) + return f(self, *args, **kwargs) return decorated @@ -1196,7 +1198,7 @@ def run_gpu_only(func=None): if not is_gpu_available(): self.skipTest("Test requires GPU") - f(self, *args, **kwargs) + return f(self, *args, **kwargs) return decorated @@ -1229,7 +1231,7 @@ def run_cuda_only(func=None): if not is_gpu_available(cuda_only=True): self.skipTest("Test requires CUDA GPU") - f(self, *args, **kwargs) + return f(self, *args, **kwargs) return decorated -- GitLab From e0068499474cfaa6940db5f80a1f82565520c048 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 10:19:03 -0800 Subject: [PATCH 0121/1095] Standardize some names from `TFGAN` -> `TF-GAN`. A noop. PiperOrigin-RevId: 227540293 --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 282430d123..0a56e69098 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -849,7 +849,7 @@ answered questions, and were part of inspiring discussions. * Remove `tf.contrib.data.Iterator.from_dataset()` method. Use `Dataset.make_initializable_iterator()` instead. * Remove seldom used and unnecessary `tf.contrib.data.Iterator.dispose_op()`. -* Reorder some TFGAN loss functions in a non-backwards compatible way. +* Reorder some TF-GAN loss functions in a non-backwards compatible way. ## Known Issues * In Python 3, `Dataset.from_generator()` does not support Unicode strings. -- GitLab From 55a21ee6b1ff9ef3ad8f00507650364897093a90 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 2 Jan 2019 10:20:37 -0800 Subject: [PATCH 0122/1095] Internal Change. PiperOrigin-RevId: 227540535 --- tensorflow/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 6724177d17..933ccf1c7b 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -79,6 +79,7 @@ py_library( srcs_version = "PY2AND3", visibility = [ "//tensorflow:__pkg__", + "//tensorflow:internal", "//tensorflow/python/estimator:__subpackages__", "//tensorflow/python/keras:__subpackages__", "//tensorflow/python/tools:__pkg__", -- GitLab From 6cb28e4889aab7124ea792d1d013bc6ae90f28b5 Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Wed, 2 Jan 2019 10:27:12 -0800 Subject: [PATCH 0123/1095] Move TPU Initialization out of the TPU Strategy constructor. PiperOrigin-RevId: 227541646 --- tensorflow/contrib/distribute/__init__.py | 2 + tensorflow/contrib/distribute/python/BUILD | 2 +- .../contrib/distribute/python/combinations.py | 18 ++++-- .../contrib/distribute/python/tpu_strategy.py | 64 +++++++++---------- tensorflow/python/training/session_manager.py | 8 ++- 5 files changed, 55 insertions(+), 39 deletions(-) diff --git a/tensorflow/contrib/distribute/__init__.py b/tensorflow/contrib/distribute/__init__.py index 8ec73654e3..4c3f9b8f02 100644 --- a/tensorflow/contrib/distribute/__init__.py +++ b/tensorflow/contrib/distribute/__init__.py @@ -30,6 +30,7 @@ from tensorflow.contrib.distribute.python.monitor import Monitor from tensorflow.contrib.distribute.python.one_device_strategy import OneDeviceStrategy from tensorflow.contrib.distribute.python.parameter_server_strategy import ParameterServerStrategy from tensorflow.contrib.distribute.python.step_fn import * +from tensorflow.contrib.distribute.python.tpu_strategy import initialize_tpu_system from tensorflow.contrib.distribute.python.tpu_strategy import TPUStrategy from tensorflow.python.distribute.cross_device_ops import * from tensorflow.python.distribute.distribute_config import DistributeConfig @@ -58,6 +59,7 @@ _allowed_symbols = [ 'StandardSingleLossStep', 'ReplicaContext', 'TPUStrategy', + 'initialize_tpu_system', 'get_cross_replica_context', 'get_distribution_strategy', 'get_loss_reduction', diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index f27224e46e..78ad3214fb 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -1,8 +1,8 @@ # Implementation of a prototype TF distributed computation library. +load("//tensorflow/compiler/tests:build_defs.bzl", "tf_xla_py_test") load("//tensorflow:tensorflow.bzl", "py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_test") -load("//tensorflow/compiler/tests:build_defs.bzl", "tf_xla_py_test") package( default_visibility = [ diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index 4a934953ad..f6c4291659 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -46,7 +46,7 @@ import unittest from absl.testing import parameterized import six -from tensorflow.contrib.cluster_resolver import TPUClusterResolver +from tensorflow.contrib import cluster_resolver from tensorflow.contrib.distribute.python import mirrored_strategy as mirrored_lib from tensorflow.contrib.distribute.python import one_device_strategy as one_device_lib from tensorflow.contrib.distribute.python import tpu_strategy as tpu_lib @@ -321,6 +321,15 @@ class NamedDistribution(object): return self._required_tpu +def _get_tpu_strategy_creator(steps_per_run): + def _create_tpu_strategy(): + resolver = cluster_resolver.TPUClusterResolver("") + tpu_lib.initialize_tpu_system(resolver) + strategy = tpu_lib.TPUStrategy(resolver, steps_per_run=steps_per_run) + return strategy + return _create_tpu_strategy + + # pylint: disable=g-long-lambda default_strategy = NamedDistribution( "Default", @@ -330,13 +339,12 @@ one_device_strategy = NamedDistribution( "OneDeviceCPU", lambda: one_device_lib.OneDeviceStrategy("/cpu:0"), required_gpus=None) tpu_strategy = NamedDistribution( - "TPU", lambda: tpu_lib.TPUStrategy( - TPUClusterResolver(""), steps_per_run=2), + "TPU", _get_tpu_strategy_creator(steps_per_run=2), required_tpu=True) tpu_strategy_one_step = NamedDistribution( - "TPUOneStep", lambda: tpu_lib.TPUStrategy( - TPUClusterResolver(""), steps_per_run=1), + "TPUOneStep", _get_tpu_strategy_creator(steps_per_run=1), required_tpu=True) + mirrored_strategy_with_one_cpu = NamedDistribution( "Mirrored1CPU", lambda: mirrored_lib.MirroredStrategy(["/cpu:0"])) diff --git a/tensorflow/contrib/distribute/python/tpu_strategy.py b/tensorflow/contrib/distribute/python/tpu_strategy.py index 10b7ef0407..9cb2e461ab 100644 --- a/tensorflow/contrib/distribute/python/tpu_strategy.py +++ b/tensorflow/contrib/distribute/python/tpu_strategy.py @@ -51,6 +51,29 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest +_TPU_INITIALIZE_SYSTEM_COLLECTION = "TPU_STRATEGY_INITIALIZE" + + +def initialize_tpu_system(cluster_resolver=None): + """Initialize the TPU devices in a separate session and graph. + + Args: + cluster_resolver: A tf.contrib.cluster_resolver.TPUClusterResolver, + which provides information about the TPU cluster. + """ + if cluster_resolver is None: + cluster_resolver = resolver_lib.TPUClusterResolver("") + master = cluster_resolver.master() + + logging.info("Initializing the TPU system.") + session_config = config_pb2.ConfigProto(allow_soft_placement=True) + + with ops.Graph().as_default(): + with session_lib.Session(config=session_config, target=master) as sess: + sess.run([tpu.initialize_system()]) + logging.info("Finished initializing TPU system.") + + def get_tpu_system_metadata(tpu_cluster_resolver): """Retrieves TPU system metadata given a TPUClusterResolver.""" master = tpu_cluster_resolver.master() @@ -164,11 +187,6 @@ class TPUStrategy(distribute_lib.DistributionStrategy): class TPUExtended(distribute_lib.DistributionStrategyExtended): """Implementation of TPUStrategy.""" - # Track what TPU devices have been initialized. This is *intentionally* - # shared across all instances of TPUExtended as we want to keep track of which - # devices are initialized globally. - _initialized_devices = [] - def __init__(self, container_strategy, tpu_cluster_resolver=None, @@ -213,32 +231,6 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended): self.steps_per_run = steps_per_run self._require_static_shapes = True - # Initialize the TPU devices. - self._initialize_tpu() - - def _initialize_tpu(self): - """Initialize the TPU devices in a separate session and graph. - - We keep track of all the TPU devices that we're initialized as we should - only be running TPU initialize once for the entire process. - """ - master = self._tpu_cluster_resolver.master() - # Verify TPU has not already been initialized in this process. - if master in TPUExtended._initialized_devices: - logging.info("TPU master %s has already been initialized." % master) - return - - logging.info("Initializing the TPU system.") - session_config = config_pb2.ConfigProto(allow_soft_placement=True) - self._configure(session_config) - with ops.Graph().as_default(): - with session_lib.Session(config=session_config, target=master) as sess: - sess.run([tpu.initialize_system()]) - logging.info("Finized initializing TPU system.") - - # Update Strategy state to make sure we can track device initialization. - TPUExtended._initialized_devices.append(master) - def _validate_colocate_with_variable(self, colocate_with_variable): values.validate_colocate_tpu_variable(colocate_with_variable, self) @@ -468,7 +460,15 @@ class TPUExtended(distribute_lib.DistributionStrategyExtended): # TODO(priyag): Add appopriate call here when eager is supported for TPUs. raise NotImplementedError("Eager mode not supported in TPUStrategy.") else: - return [] + # TODO(jhseu): We need this hack because DistributionStrategies must be + # pickleable for copy.deepcopy(). Remove when initialize_system goes away. + graph = ops.get_default_graph() + tpu_init = graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION) + if tpu_init: + return tpu_init + graph.add_to_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION, + tpu.initialize_system()) + return graph.get_collection(_TPU_INITIALIZE_SYSTEM_COLLECTION) def _finalize(self): if context.executing_eagerly(): diff --git a/tensorflow/python/training/session_manager.py b/tensorflow/python/training/session_manager.py index 0f68fcfe8b..1ec9639177 100644 --- a/tensorflow/python/training/session_manager.py +++ b/tensorflow/python/training/session_manager.py @@ -25,6 +25,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import checkpoint_management +from tensorflow.python.training import distribution_strategy_context from tensorflow.python.util.tf_export import tf_export @@ -182,7 +183,12 @@ class SessionManager(object): """ self._target = master sess = session.Session(self._target, graph=self._graph, config=config) - + # TODO(jhseu): Delete once tpu.initialize_system() goes away. + initialize_ops = ( + distribution_strategy_context.get_distribution_strategy().initialize() + ) + if initialize_ops: + sess.run(initialize_ops) if checkpoint_dir and checkpoint_filename_with_path: raise ValueError("Can not provide both checkpoint_dir and " "checkpoint_filename_with_path.") -- GitLab From 25ee17bafb33f0f72bea84c1166e20d75955f273 Mon Sep 17 00:00:00 2001 From: Jian Li Date: Wed, 2 Jan 2019 10:43:05 -0800 Subject: [PATCH 0124/1095] Fix a mismatch in fixed point softmax. PiperOrigin-RevId: 227544341 --- tensorflow/lite/kernels/activations.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/lite/kernels/activations.cc b/tensorflow/lite/kernels/activations.cc index 13ef94c016..4463a6c5a6 100644 --- a/tensorflow/lite/kernels/activations.cc +++ b/tensorflow/lite/kernels/activations.cc @@ -57,9 +57,9 @@ struct PreluOpData : public OpData { }; namespace { -TfLiteStatus CheckInputQuantParams(TfLiteContext* context, - const TfLiteTensor* input, - const TfLiteTensor* output) { +TfLiteStatus CheckOutputQuantParams(TfLiteContext* context, + const TfLiteTensor* input, + const TfLiteTensor* output) { if (input->type == kTfLiteUInt8) { TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0); TF_LITE_ENSURE(context, output->params.scale == 1. / 256); @@ -236,7 +236,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE(context, num_dims >= 1 && num_dims <= 4); if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) { - if (CheckInputQuantParams(context, input, output) == kTfLiteError) { + if (CheckOutputQuantParams(context, input, output) == kTfLiteError) { return kTfLiteError; } -- GitLab From d7e56c9b27d0e712bd268e1c06d57beb481083e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 10:54:39 -0800 Subject: [PATCH 0125/1095] Fixes bug in tf.print that prevents it from printing tensors of strings. PiperOrigin-RevId: 227546311 --- tensorflow/core/framework/tensor.cc | 24 ++++++++++++------- tensorflow/core/framework/tensor_test.cc | 15 ++++++++---- .../python/kernel_tests/logging_ops_test.py | 13 ++++++++++ tensorflow/python/ops/logging_ops.py | 2 +- 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 7e841489eb..0c96ec8168 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -932,10 +932,18 @@ namespace { // logic is so simple we can just replicate it here, where it is close to its // usage and easy to change later. And there's the extra benefit of not // accessing an 'internal' namespace. -inline const strings::AlphaNum& PrintOneElement(const strings::AlphaNum& a) { +inline const strings::AlphaNum& PrintOneElement(const strings::AlphaNum& a, + bool print_v2) { return a; } -inline float PrintOneElement(const Eigen::half& h) { +inline string PrintOneElement(const string& a, bool print_v2) { + if (print_v2) { + return "\"" + str_util::CEscape(a) + "\""; + } else { + return str_util::CEscape(a); + } +} +inline float PrintOneElement(const Eigen::half& h, bool print_v2) { return static_cast(h); } @@ -957,7 +965,7 @@ void PrintOneDim(int dim_index, const gtl::InlinedVector& shape, return; } if (i > 0) strings::StrAppend(result, " "); - strings::StrAppend(result, PrintOneElement(data[(*data_index)++])); + strings::StrAppend(result, PrintOneElement(data[(*data_index)++], false)); } return; } @@ -1000,7 +1008,7 @@ void PrintOneDimV2(int dim_index, const gtl::InlinedVector& shape, // We have recursed beyond all the dimensions into a single element // of the tensor. if (dim_index == num_dims) { - strings::StrAppend(result, PrintOneElement(data[data_index])); + strings::StrAppend(result, PrintOneElement(data[data_index], true)); return; } @@ -1048,7 +1056,7 @@ string SummarizeArray(int64 limit, int64 num_elts, if (shape.empty()) { for (int64 i = 0; i < limit; ++i) { if (i > 0) strings::StrAppend(&ret, " "); - strings::StrAppend(&ret, PrintOneElement(array[i])); + strings::StrAppend(&ret, PrintOneElement(array[i], print_v2)); } if (num_elts > limit) strings::StrAppend(&ret, "..."); return ret; @@ -1123,6 +1131,9 @@ string Tensor::SummarizeValue(int64 max_entries, bool print_v2) const { // will emit "1 0..." which is more compact. return SummarizeArray(limit, num_elts, shape_, data, print_v2); break; + case DT_STRING: + return SummarizeArray(limit, num_elts, shape_, data, print_v2); + break; default: { // All irregular cases string ret; @@ -1134,9 +1145,6 @@ string Tensor::SummarizeValue(int64 max_entries, bool print_v2) const { for (size_t i = 0; i < limit; ++i) { if (i > 0) strings::StrAppend(&ret, " "); switch (dtype()) { - case DT_STRING: - strings::StrAppend(&ret, str_util::CEscape(flat()(i))); - break; case DT_VARIANT: { const Variant& v = flat()(i); strings::StrAppend(&ret, v.DebugString()); diff --git a/tensorflow/core/framework/tensor_test.cc b/tensorflow/core/framework/tensor_test.cc index 713f91fe04..d4aed38761 100644 --- a/tensorflow/core/framework/tensor_test.cc +++ b/tensorflow/core/framework/tensor_test.cc @@ -1370,7 +1370,7 @@ TEST(SummarizeValue, STRING) { EXPECT_EQ("one two three four five", x.SummarizeValue(16)); x = MkTensor(DT_STRING, TensorShape({5, 1, 5}), {"one", "two", "three", "four", "five"}); - EXPECT_EQ("one two three four five one...", x.SummarizeValue(6)); + EXPECT_EQ("[[one two three four five]][[one...]]...", x.SummarizeValue(6)); } TEST(SummarizeValue, INT32_PRINT_V2) { @@ -1423,11 +1423,16 @@ TEST(SummarizeValue, BOOL_PRINT_V2) { TEST(SummarizeValue, STRING_PRINT_V2) { Tensor x = MkTensor(DT_STRING, TensorShape({5}), {"one", "two", "three", "four", "five"}); - EXPECT_EQ("[one two three four five]", x.SummarizeValue(16, true)); - EXPECT_EQ("[one two three four five]", x.SummarizeValue(-1, true)); - x = MkTensor(DT_STRING, TensorShape({5, 1, 5}), + EXPECT_EQ("[\"one\" \"two\" \"three\" \"four\" \"five\"]", + x.SummarizeValue(16, true)); + EXPECT_EQ("[\"one\" \"two\" \"three\" \"four\" \"five\"]", + x.SummarizeValue(-1, true)); + EXPECT_EQ("[\"one\" \"two\" ... \"four\" \"five\"]", + x.SummarizeValue(2, true)); + x = MkTensor(DT_STRING, TensorShape({2, 2}), {"one", "two", "three", "four", "five"}); - EXPECT_EQ("[one two three four five one...]", x.SummarizeValue(6, true)); + EXPECT_EQ("[[\"one\" \"two\"]\n [\"three\" \"four\"]]", + x.SummarizeValue(16, true)); } void BM_CreateAndDestroy(int iters) { diff --git a/tensorflow/python/kernel_tests/logging_ops_test.py b/tensorflow/python/kernel_tests/logging_ops_test.py index 85035e5f7d..3896b138c9 100644 --- a/tensorflow/python/kernel_tests/logging_ops_test.py +++ b/tensorflow/python/kernel_tests/logging_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os +import string import sys import tempfile @@ -37,6 +38,7 @@ from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test + class LoggingOpsTest(test.TestCase): @test_util.run_deprecated_v1 @@ -80,6 +82,17 @@ class PrintV2Test(test.TestCase): expected = "[0 1 2 ... 7 8 9]" self.assertTrue((expected + "\n") in printed.contents()) + @test_util.run_in_graph_and_eager_modes() + def testPrintOneStringTensor(self): + with self.cached_session(): + tensor = ops.convert_to_tensor([char for char in string.ascii_lowercase]) + with self.captureWritesToStream(sys.stderr) as printed: + print_op = logging_ops.print_v2(tensor) + self.evaluate(print_op) + + expected = "[\"a\" \"b\" \"c\" ... \"x\" \"y\" \"z\"]" + self.assertIn((expected + "\n"), printed.contents()) + @test_util.run_in_graph_and_eager_modes() def testPrintOneTensorVarySummarize(self): with self.cached_session(): diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index 5a948a2194..3cb16eb81e 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -263,7 +263,7 @@ def print_v2(*inputs, **kwargs): # If we are only printing a single string scalar, there is no need to format if (len(inputs) == 1 and tensor_util.is_tensor(inputs[0]) and (not isinstance(inputs[0], sparse_tensor.SparseTensor)) - and inputs[0].shape and (inputs[0].dtype == dtypes.string)): + and (inputs[0].shape.ndims == 0)and (inputs[0].dtype == dtypes.string)): formatted_string = inputs[0] # Otherwise, we construct an appropriate template for the tensors we are # printing, and format the template using those tensors. -- GitLab From e859bba35844942f1c60e3ea9f3d82bd574ed7e4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 11:10:49 -0800 Subject: [PATCH 0126/1095] Fix example in documentation for sparse_split. PiperOrigin-RevId: 227549459 --- tensorflow/python/ops/sparse_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index f6265f5e6a..a149d98730 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -806,8 +806,8 @@ def sparse_split(keyword_required=KeywordRequired(), Graphically the output tensors are: output_tensor[0] = - [ a ] - [b c ] + [ a ] + [b c ] output_tensor[1] = [ d e ] -- GitLab From af42cb228793f9a1d3f28f91bdb5334924b4a504 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 2 Jan 2019 11:10:57 -0800 Subject: [PATCH 0127/1095] Enable Eigen custom contraction kernels support. PiperOrigin-RevId: 227549475 --- tensorflow/contrib/rnn/BUILD | 11 +++++++++-- tensorflow/contrib/rnn/kernels/blas_gemm.h | 4 ++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD index 44b232e0f2..39b6885968 100644 --- a/tensorflow/contrib/rnn/BUILD +++ b/tensorflow/contrib/rnn/BUILD @@ -227,7 +227,10 @@ tf_custom_op_library( "kernels/lstm_ops_gpu.cu.cc", "kernels/lstm_ops.h", ], - deps = ["//tensorflow/core/kernels:eigen_helpers"], + deps = [ + "//tensorflow/core/kernels:eigen_contraction_kernel", + "//tensorflow/core/kernels:eigen_helpers", + ], ) tf_gen_op_wrapper_py( @@ -249,7 +252,10 @@ tf_custom_op_library( "kernels/gru_ops_gpu.cu.cc", "kernels/gru_ops.h", ], - deps = ["//tensorflow/core/kernels:eigen_helpers"], + deps = [ + "//tensorflow/core/kernels:eigen_contraction_kernel", + "//tensorflow/core/kernels:eigen_helpers", + ], ) tf_gen_op_wrapper_py( @@ -346,6 +352,7 @@ tf_kernel_library( deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core/kernels:eigen_contraction_kernel", "//tensorflow/core/kernels:eigen_helpers", "//third_party/eigen3", ], diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h index d37210d4b8..12f3182a6a 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.h +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h @@ -21,6 +21,10 @@ limitations under the License. #include "tensorflow/core/kernels/eigen_activations.h" #include "tensorflow/core/platform/types.h" +#if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL) +#include "tensorflow/core/kernels/eigen_contraction_kernel.h" +#endif + namespace tensorflow { class OpKernelContext; namespace functor { -- GitLab From 3f4249ef9c9b0003c6314e6a3449337c7446b110 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 2 Jan 2019 11:24:55 -0800 Subject: [PATCH 0128/1095] Internal change. PiperOrigin-RevId: 227551788 --- tensorflow/compiler/xla/client/lib/BUILD | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/client/lib/BUILD b/tensorflow/compiler/xla/client/lib/BUILD index 97fb4b9e0e..966a581981 100644 --- a/tensorflow/compiler/xla/client/lib/BUILD +++ b/tensorflow/compiler/xla/client/lib/BUILD @@ -352,7 +352,11 @@ cc_library( xla_test( name = "quantize_test", srcs = ["quantize_test.cc"], - tags = ["enable_for_xla_interpreter"], + # TODO(b/122119490): re-enable TAP after fixing. + tags = [ + "enable_for_xla_interpreter", + "notap", + ], deps = [ ":quantize", "//tensorflow/compiler/xla:test", -- GitLab From eadd684c5b9752c43a26a5ed1663f195ac126077 Mon Sep 17 00:00:00 2001 From: Pavithra Vijay Date: Wed, 2 Jan 2019 11:27:06 -0800 Subject: [PATCH 0129/1095] Add root mean squared error v2 metric. PiperOrigin-RevId: 227552168 --- tensorflow/python/keras/metrics.py | 47 +++++++++++++++++++++++++ tensorflow/python/keras/metrics_test.py | 35 ++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index 1e0359d9e5..d9842f7810 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -1491,6 +1491,53 @@ class CategoricalHinge(MeanMetricWrapper): return super(CategoricalHinge, cls).from_config(config) +class RootMeanSquaredError(Mean): + """Computes root mean squared error metric between `y_true` and `y_pred`. + + Usage: + + ```python + m = tf.keras.metrics.RootMeanSquaredError() + m.update_state([2., 4., 6.], [1., 3., 2.]) + print('Final result: ', m.result().numpy()) # Final result: 2.449 + ``` + + Usage with tf.keras API: + + ```python + model = keras.models.Model(inputs, outputs) + model.compile('sgd', metrics=[tf.keras.metrics.RootMeanSquaredError()]) + ``` + """ + + def __init__(self, name='root_mean_squared_error', dtype=None): + super(RootMeanSquaredError, self).__init__(name, dtype=dtype) + + def update_state(self, y_true, y_pred, sample_weight=None): + """Accumulates root mean squared error statistics. + + Args: + y_true: The ground truth values. + y_pred: The predicted values. + sample_weight: Optional weighting of each example. Defaults to 1. Can be a + `Tensor` whose rank is either 0, or the same rank as `y_true`, and must + be broadcastable to `y_true`. + + Returns: + Update op. + """ + y_true = math_ops.cast(y_true, self._dtype) + y_pred = math_ops.cast(y_pred, self._dtype) + y_pred, y_true, sample_weight = squeeze_or_expand_dimensions( + y_pred, y_true, sample_weight) + error_sq = math_ops.square(y_pred - y_true) + return super(RootMeanSquaredError, self).update_state( + error_sq, sample_weight=sample_weight) + + def result(self): + return math_ops.sqrt(math_ops.div_no_nan(self.total, self.count)) + + def accuracy(y_true, y_pred): y_pred.get_shape().assert_is_compatible_with(y_true.get_shape()) if y_true.dtype != y_pred.dtype: diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py index b4eb314e40..2bce89fd1a 100644 --- a/tensorflow/python/keras/metrics_test.py +++ b/tensorflow/python/keras/metrics_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import math import os from absl.testing import parameterized import numpy as np @@ -1249,6 +1250,40 @@ class CategoricalHingeTest(test.TestCase): self.assertAllClose(0.5, self.evaluate(result), atol=1e-5) +@test_util.run_all_in_graph_and_eager_modes +class RootMeanSquaredErrorTest(test.TestCase): + + def test_config(self): + rmse_obj = metrics.RootMeanSquaredError(name='rmse', dtype=dtypes.int32) + self.assertEqual(rmse_obj.name, 'rmse') + self.assertEqual(rmse_obj._dtype, dtypes.int32) + + rmse_obj2 = metrics.RootMeanSquaredError.from_config(rmse_obj.get_config()) + self.assertEqual(rmse_obj2.name, 'rmse') + self.assertEqual(rmse_obj2._dtype, dtypes.int32) + + def test_unweighted(self): + rmse_obj = metrics.RootMeanSquaredError() + self.evaluate(variables.variables_initializer(rmse_obj.variables)) + y_true = constant_op.constant((2, 4, 6)) + y_pred = constant_op.constant((1, 3, 2)) + + update_op = rmse_obj.update_state(y_true, y_pred) + self.evaluate(update_op) + result = rmse_obj.result() + # error = [-1, -1, -4], square(error) = [1, 1, 16], mean = 18/3 = 6 + self.assertAllClose(math.sqrt(6), result, atol=1e-3) + + def test_weighted(self): + rmse_obj = metrics.RootMeanSquaredError() + self.evaluate(variables.variables_initializer(rmse_obj.variables)) + y_true = constant_op.constant((2, 4, 6, 8)) + y_pred = constant_op.constant((1, 3, 2, 3)) + sample_weight = constant_op.constant((0, 1, 0, 1)) + result = rmse_obj(y_true, y_pred, sample_weight=sample_weight) + self.assertAllClose(math.sqrt(13), self.evaluate(result), atol=1e-3) + + def _get_model(compile_metrics): model_layers = [ layers.Dense(3, activation='relu', kernel_initializer='ones'), -- GitLab From 3da41f3a4d0bcbbd96295b3afe16bdda2043a767 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 2 Jan 2019 11:27:09 -0800 Subject: [PATCH 0130/1095] [XLA] Implement scalar index form of DS/DUS on GPU and CPU This form is not yet documented, unsupported by HLO passes, and only passes verification under a flag. PiperOrigin-RevId: 227552176 --- .../xla/service/elemental_ir_emitter.cc | 31 +++++++++++--- .../llvm_ir/dynamic_update_slice_util.cc | 41 +++++++++++++++---- 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 6f928fcbaa..f84c115e0a 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1758,9 +1758,18 @@ StatusOr ElementalIrEmitter::EmitElementalDynamicSlice( auto index_typed_const = [&](uint64 c) -> llvm::Constant* { return llvm::ConstantInt::get(index_type, c); }; - llvm_ir::IrArray::Index dim_index(1, index_typed_const(i)); - TF_ASSIGN_OR_RETURN(llvm::Value * start_index_value, - operand_to_generator.at(hlo->operand(1))(dim_index)); + // TODO(b/118437727): Remove the R1 path. + llvm::Value* start_index_value; + if (hlo->operand(1)->shape().rank() == 1) { + llvm_ir::IrArray::Index dim_index(1, index_typed_const(i)); + TF_ASSIGN_OR_RETURN(start_index_value, + operand_to_generator.at(hlo->operand(1))(dim_index)); + } else { + llvm_ir::IrArray::Index zero_index(index_type); + TF_ASSIGN_OR_RETURN( + start_index_value, + operand_to_generator.at(hlo->operand(1 + i))(zero_index)); + } // Clamp the start index so that the sliced portion fits in the operand: // start_index = clamp(start_index, 0, operand_dim_size - output_dim_size) @@ -1905,9 +1914,19 @@ StatusOr ElementalIrEmitter::EmitElementalDynamicUpdateSlice( auto index_typed_const = [&](uint64 c) -> llvm::Constant* { return llvm::ConstantInt::get(index_type, c); }; - llvm_ir::IrArray::Index dim_index(1, index_typed_const(i)); - TF_ASSIGN_OR_RETURN(llvm::Value * start_index_value, - operand_to_generator.at(start_hlo)(dim_index)); + + llvm::Value* start_index_value; + // TODO(b/118437727): Remove the R1 path. + if (hlo->operand(2)->shape().rank() == 1) { + llvm_ir::IrArray::Index dim_index(1, index_typed_const(i)); + TF_ASSIGN_OR_RETURN(start_index_value, + operand_to_generator.at(hlo->operand(2))(dim_index)); + } else { + llvm_ir::IrArray::Index zero_index(index_type); + TF_ASSIGN_OR_RETURN( + start_index_value, + operand_to_generator.at(hlo->operand(2 + i))(zero_index)); + } // Clamp the start index so that the update region fits in the operand. // start_index = clamp(start_index, 0, input_dim_size - update_dim_size) diff --git a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc index 1da7794532..c66eaec8fb 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc @@ -36,8 +36,10 @@ bool CanUpdateDynamicSliceInPlace(HloInstruction* dynamic_update_slice, // EmitFusedDynamicUpdateSliceInPlace. // // Emits a sequential loop if launch_dimensions is null. +using IndexGenerator = std::function(int64)>; + static Status EmitDynamicUpdateSliceInPlaceImpl( - const Shape& update_shape, const ElementGenerator& start_indices_generator, + const Shape& update_shape, const IndexGenerator& start_indices_generator, bool is_signed, ElementGenerator update_array_generator, const IrArray& output_array, const gpu::LaunchDimensions* launch_dimensions, absl::string_view name, llvm::IRBuilder<>* b) { @@ -47,8 +49,7 @@ static Status EmitDynamicUpdateSliceInPlaceImpl( const int64 rank = output_shape.rank(); IrArray::Index start_index(b->getInt64Ty(), rank); for (int64 i = 0; i < rank; ++i) { - IrArray::Index dim_index({b->getInt64(i)}); - TF_ASSIGN_OR_RETURN(start_index[i], start_indices_generator(dim_index)); + TF_ASSIGN_OR_RETURN(start_index[i], start_indices_generator(i)); llvm::Value* output_dim_size = llvm::ConstantInt::get( start_index[i]->getType(), output_shape.dimensions(i)); llvm::Value* update_dim_size = llvm::ConstantInt::get( @@ -112,9 +113,20 @@ Status EmitDynamicUpdateSliceInPlace(absl::Span operand_arrays, Shape output_shape = output_array.GetShape(); Shape update_shape = update_array.GetShape(); - ElementGenerator start_indices_generator = [&](const IrArray::Index& index) { - return start_indices_array.EmitReadArrayElement(index, b); - }; + IndexGenerator start_indices_generator; + // TODO(b/118437727): Remove the R1 path, and rename the variables. + if (start_indices_array.GetShape().rank() == 1) { + start_indices_generator = [&](int64 index) { + return start_indices_array.EmitReadArrayElement( + IrArray::Index({b->getInt64(index)}), b); + }; + } else { + start_indices_generator = [&](int64 index) { + return operand_arrays[2 + index].EmitReadArrayElement( + IrArray::Index(b->getInt64Ty()), b); + }; + } + ElementGenerator update_array_generator = [&](const IrArray::Index& index) { return update_array.EmitReadArrayElement(index, b); }; @@ -165,8 +177,21 @@ static Status EmitFusedDynamicUpdateSliceInPlaceImpl( elemental_emitter); TF_RETURN_IF_ERROR(dynamic_update_slice->Accept(&fused_emitter)); ElementGenerator update_array_generator = fused_emitter.GetGenerator(update); - ElementGenerator start_indices_generator = - fused_emitter.GetGenerator(start_indices); + + // TODO(b/118437727): Remove the R1 path, and rename the variables. + IndexGenerator start_indices_generator; + if (start_indices->shape().rank() == 1) { + start_indices_generator = [&](int64 index) { + return fused_emitter.GetGenerator(start_indices)( + IrArray::Index({b->getInt64(index)})); + }; + } else { + start_indices_generator = [&](int64 index) { + ElementGenerator element_generator = + fused_emitter.GetGenerator(dynamic_update_slice->operand(2 + index)); + return element_generator(IrArray::Index(b->getInt64Ty())); + }; + } bool is_signed = ShapeUtil::ElementIsSigned(start_indices->shape()); return EmitDynamicUpdateSliceInPlaceImpl( -- GitLab From a552dc59596fa7d17bfcacc1cb4efc60d7db7d43 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Wed, 2 Jan 2019 11:32:05 -0800 Subject: [PATCH 0131/1095] Implements dynamic_size in TensorArray v2. Adds a TensorListResize op and changes implementation of set_item to resize the list if index is out of bounds before calling the SetItem kernel. PiperOrigin-RevId: 227553120 --- .../base_api/api_def_TensorListResize.pbtxt | 10 +++ .../python_api/api_def_TensorListResize.pbtxt | 4 ++ tensorflow/core/kernels/list_kernels.cc | 59 +++++++++++++++ tensorflow/core/ops/list_ops.cc | 18 +++++ .../python/kernel_tests/list_ops_test.py | 55 ++++++++++++++ .../kernel_tests/tensor_array_ops_test.py | 25 +++---- tensorflow/python/ops/list_ops.py | 34 ++++++++- tensorflow/python/ops/tensor_array_ops.py | 71 ++++++++----------- tensorflow/python/ops/while_v2.py | 15 +--- 9 files changed, 224 insertions(+), 67 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorListResize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListResize.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListResize.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListResize.pbtxt new file mode 100644 index 0000000000..5b34f8cec7 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_TensorListResize.pbtxt @@ -0,0 +1,10 @@ +op { + graph_op_name: "TensorListResize" + summary: "Resizes the list." + description: <