From ca19b32e4d1574ad29e36dbc164c320aeca80d47 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Wed, 14 Feb 2018 00:13:00 -0800 Subject: [PATCH 001/311] cifar 10 divergance fix and batchnorm unit test fix --- .../core/kernels/mkl_fused_batch_norm_op.cc | 96 +++++++++++++------ tensorflow/core/kernels/mkl_relu_op.cc | 20 +++- 2 files changed, 81 insertions(+), 35 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 8313224d7f..b7dee3fb3e 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -1110,19 +1110,12 @@ class MklFusedBatchNormGradOp : public OpKernel { return; } - if (dnn_shape_src.IsMklTensor()) - depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); - else - ExtractParams(context); - - memory::format format_m; if (dnn_shape_src.IsMklTensor()) { - if (dnn_shape_src.IsTensorInNCHWFormat()) - format_m = memory::format::nchw; - else - format_m = memory::format::nhwc; + depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); + } else if (dnn_shape_diff_dst.IsMklTensor()) { + depth_ = dnn_shape_diff_dst.DimSize(MklDnnDims::Dim_C); } else { - format_m = TFDataFormatToMklDnnDataFormat(tensor_format_); + ExtractParams(context); } MklDnnData src(&cpu_engine); @@ -1146,20 +1139,20 @@ class MklFusedBatchNormGradOp : public OpKernel { diff_dst_dims = TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(), tensor_format_); - // set src and diff_dst primitives + // set src and diff_dst primitives according to input layout memory::desc src_md({}, memory::data_undef, memory::format_undef); memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef); - if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { - if (dnn_shape_src.IsMklTensor()) { - src_md = dnn_shape_src.GetMklLayout(); - diff_dst_md = src_md; - } else { - diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); - src_md = diff_dst_md; - } + if (dnn_shape_src.IsMklTensor()) { + src_md = dnn_shape_src.GetMklLayout(); } else { - src_md = memory::desc(src_dims, MklDnnType(), format_m); - diff_dst_md = src_md; + src_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + if (dnn_shape_diff_dst.IsMklTensor()) { + diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); + } else { + diff_dst_md = memory::desc(diff_dst_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); } src.SetUsrMem(src_md, &src_tensor); diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); @@ -1211,28 +1204,64 @@ class MklFusedBatchNormGradOp : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + + // MKL-DNN's BN primitive not provide API to fetch internal format + // set common_md as OpMem + // src and diff_dst will reorder to common_md + // diff_src will set as common_md + memory::desc common_md({}, memory::data_undef, memory::format_undef); + if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor()) { + common_md = dnn_shape_src.GetMklLayout(); + } else { + common_md = dnn_shape_diff_dst.GetMklLayout(); + } + } else { + common_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + // if any of src and diff_dst as mkl layout, + // then we set diff_src as mkl layout + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); - auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc(); + // set diff_src's mkl layout as common_md + auto diff_src_pd = memory::primitive_desc(common_md, cpu_engine); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), src_dims, - format_m); - dnn_shape_diff_src.SetTfDimOrder(dnn_shape_src.GetDimension(), - tensor_format_); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_src.GetDimension(), + src_dims, + dnn_shape_src.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_src.GetDimension(), + tensor_format_); + } else { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_diff_dst.GetDimension(), + src_dims, + dnn_shape_diff_dst.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_diff_dst.GetDimension(), + tensor_format_); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyont should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, tf_shape_diff_src, dnn_shape_diff_src); - diff_src.SetUsrMem(src_md, diff_src_tensor); + // set diff_src + diff_src.SetUsrMem(common_md, diff_src_tensor); prop_kind pk = prop_kind::backward; auto bnrm_bwd_desc = batch_normalization_backward::desc( - pk, diff_src.GetUsrMemDesc(), src.GetUsrMemDesc(), epsilon_, + pk, common_md, common_md, epsilon_, /* for inference, specify use_global_stats 1. on fwd prop, use mean and variance provided as inputs @@ -1245,11 +1274,16 @@ class MklFusedBatchNormGradOp : public OpKernel { auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc( bnrm_bwd_desc, cpu_engine, bnrm_fwd_pd); + std::vector net; + src.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + diff_dst.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + auto bnrm_bwd_op = batch_normalization_backward( bnrm_bwd_pd, src.GetOpMem(), mean.GetOpMem(), variance.GetOpMem(), diff_dst.GetOpMem(), weights_m, diff_src.GetOpMem(), diff_weights_m); - std::vector net; net.push_back(bnrm_bwd_op); stream(stream::kind::eager).submit(net).wait(); diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 51db3991e2..924b9da7e0 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -368,8 +368,11 @@ void MklReluGradOp::Compute(OpKernelContext* context) { mkl_context.MklCleanup(); } + + #else // INTEL_MKL_ML + template class MklReluOpBase : public OpKernel { public: @@ -579,17 +582,26 @@ class MklReluGradOpBase : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc(); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + } else { + dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), + dnn_shape_diff_dst.GetSizesAsMklDnnDims(), + dnn_shape_diff_dst.GetTfDataFormat()); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyone should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, -- GitLab From 3b08cd35bc108f48b4f63d73af7a53eb8a1169f9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 10:17:15 -0800 Subject: [PATCH 002/311] Generalize the gather_indices dimension that stores indices This is now exposed as a index_vector_dim dimension number. Also fixed an off-by-one error in ValidateGatherDimensionNumbers in the expression computing output_shape_rank. PiperOrigin-RevId: 187040748 --- .../compiler/xla/service/hlo_instruction.cc | 9 +- .../compiler/xla/service/hlo_instruction.h | 3 +- .../xla/service/hlo_instruction_test.cc | 43 +++- .../compiler/xla/service/shape_inference.cc | 42 ++-- .../xla/service/shape_inference_test.cc | 191 ++++++++++++++---- tensorflow/compiler/xla/xla_data.proto | 4 + .../performance/xla/operation_semantics.md | 61 ++++-- 7 files changed, 274 insertions(+), 79 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index b7dd055d7c..a534d8ff06 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1172,7 +1172,8 @@ bool HloInstruction::HasSideEffect() const { /* static */ GatherDimensionNumbers HloInstruction::MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims) { + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim) { GatherDimensionNumbers gather_dim_numbers; for (int64 output_window_dim : output_window_dims) { gather_dim_numbers.add_output_window_dims(output_window_dim); @@ -1184,6 +1185,7 @@ bool HloInstruction::HasSideEffect() const { gather_dim_numbers.add_gather_dims_to_operand_dims(gather_dim_to_input_dim); } + gather_dim_numbers.set_index_vector_dim(index_vector_dim); return gather_dim_numbers; } @@ -3369,9 +3371,12 @@ string HloInstruction::GatherDimensionNumbersToString() const { string gather_dims_to_operand_dims = StrCat( "gather_dims_to_operand_dims={", Join(gather_dimension_numbers_->gather_dims_to_operand_dims(), ","), "}"); + string index_vector_dim = StrCat( + "index_vector_dim=", gather_dimension_numbers_->index_vector_dim()); return Join>( - {output_window_dims, elided_window_dims, gather_dims_to_operand_dims}, + {output_window_dims, elided_window_dims, gather_dims_to_operand_dims, + index_vector_dim}, ", "); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index e4d22e5703..e4c86214c2 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -502,7 +502,8 @@ class HloInstruction { static GatherDimensionNumbers MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims); + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim); // Returns the opcode for this instruction. HloOpcode opcode() const { return opcode_; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 32d3ed272b..f2980d309d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1271,7 +1271,7 @@ TEST_F(HloInstructionTest, Stringification) { "true_computation=%TransposeDot, false_computation=%TransposeDot"); } -TEST_F(HloInstructionTest, StringifyGather) { +TEST_F(HloInstructionTest, StringifyGather_0) { Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); Shape gather_indices_tensor_shape = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); @@ -1291,7 +1291,8 @@ TEST_F(HloInstructionTest, StringifyGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); HloModule module(TestName()); @@ -1303,7 +1304,43 @@ TEST_F(HloInstructionTest, StringifyGather) { "s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), " "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " "gather_dims_to_operand_dims={0,1,2,3,4}, " - "window_bounds={30,29,28,27,26}"); + "index_vector_dim=4, window_bounds={30,29,28,27,26}"); +} + +TEST_F(HloInstructionTest, StringifyGather_1) { + Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); + Shape gather_indices_tensor_shape = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); + Shape gather_result_shape = + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}); + + HloComputation::Builder builder("Gather"); + HloInstruction* input = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_tensor_shape, "input_tensor")); + HloInstruction* gather_indices = + builder.AddInstruction(HloInstruction::CreateParameter( + 1, gather_indices_tensor_shape, "gather_indices")); + + HloInstruction* gather_instruction = + builder.AddInstruction(HloInstruction::CreateGather( + gather_result_shape, input, gather_indices, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + HloModule module(TestName()); + module.AddEntryComputation(builder.Build()); + + EXPECT_EQ(gather_instruction->ToString(), + "%gather = f32[10,9,7,6,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} " + "gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, " + "s64[10,9,5,7,6]{4,3,2,1,0} %gather_indices), " + "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " + "gather_dims_to_operand_dims={0,1,2,3,4}, " + "index_vector_dim=2, window_bounds={30,29,28,27,26}"); } } // namespace diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index c9692757b2..607a672025 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -2467,27 +2467,27 @@ static Status ValidateGatherDimensionNumbers( const int64 output_window_dim_count = dim_numbers.output_window_dims_size(); const int64 output_shape_rank = - output_window_dim_count + gather_indices_shape.size(); + output_window_dim_count + gather_indices_shape.size() - 1; for (int i = 0; i < dim_numbers.output_window_dims_size(); ++i) { int64 window_index = dim_numbers.output_window_dims(i); if (window_index < 0 || window_index >= output_shape_rank) { return InvalidArgument( "Window index %d in gather op is out of bounds; got %lld, but should " - "have been in" - "[0,%lld)", + "have been in [0,%lld)", i, window_index, output_shape_rank); } } if (dim_numbers.gather_dims_to_operand_dims_size() != - gather_indices_shape.back()) { + gather_indices_shape[dim_numbers.index_vector_dim()]) { return InvalidArgument( - "There must be exactly as many elements in gather_dims_to_operand_dims " - "as there are elements in the last dimension of %%gather_indices; got: " - "%d, expected %lld", + "Gather op has %d elements in gather_dims_to_operand_dims and the " + "bound of dimension index_vector_dim=%lld of gather_indices is " + "%lld. These two numbers must be equal.", dim_numbers.gather_dims_to_operand_dims_size(), - gather_indices_shape.back()); + dim_numbers.index_vector_dim(), + gather_indices_shape[dim_numbers.index_vector_dim()]); } for (int i = 0; i < dim_numbers.gather_dims_to_operand_dims_size(); i++) { @@ -2550,24 +2550,33 @@ static Status ValidateGatherDimensionNumbers( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( gather_indices_shape, "gather indices operand of gather op")); - if (gather_indices_shape.dimensions_size() < 1) { + if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { return InvalidArgument( - "Gather indices parameter must at least of rank 1; got %s", + "Gather indices parameter must be an integral tensor; got %s", ShapeUtil::HumanString(gather_indices_shape).c_str()); } - if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { + // We implicitly reshape gather indices of shape P[A,B,C] to P[A,B,C,1] if + // index_vector_dim is rank(P). The bounds of this expanded shape is + // stored in expanded_gather_indices_shape. + + if (gather_indices_shape.dimensions_size() < + gather_dim_numbers.index_vector_dim() || + gather_dim_numbers.index_vector_dim() < 0) { return InvalidArgument( - "Gather indices parameter must be an integral tensor; got %s", - ShapeUtil::HumanString(gather_indices_shape).c_str()); + "Gather index leaf dimension must be within [0, rank(gather_indices) + " + "1). rank(gather_indices) is %d and gather index leaf dimension is " + "%lld.", + gather_indices_shape.dimensions_size(), + gather_dim_numbers.index_vector_dim()); } std::vector expanded_gather_indices_shape; - // We implicitly reshape gather indices of shape P[N] to P[N,1]. expanded_gather_indices_shape.reserve(gather_indices_shape.dimensions_size()); c_copy(gather_indices_shape.dimensions(), std::back_inserter(expanded_gather_indices_shape)); - if (expanded_gather_indices_shape.size() == 1) { + if (expanded_gather_indices_shape.size() == + gather_dim_numbers.index_vector_dim()) { expanded_gather_indices_shape.push_back(1); } @@ -2632,6 +2641,9 @@ static Status ValidateGatherDimensionNumbers( } current_bound = window_bounds[window_dims_seen++]; } else { + if (gather_dims_seen == gather_dim_numbers.index_vector_dim()) { + gather_dims_seen++; + } current_bound = expanded_gather_indices_shape[gather_dims_seen++]; } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 7eb120843f..029d2b3b86 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -1530,11 +1530,17 @@ TEST_F(ShapeInferenceTest, BadSlice) { class GatherShapeInferenceTest : public ShapeInferenceTest { protected: + const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {}); + const Shape s64_vector_5_ = ShapeUtil::MakeShape(S64, {5}); const Shape s64_vector_32_ = ShapeUtil::MakeShape(S64, {32}); const Shape s64_4d_tensor_10_9_8_7_1_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 1}); const Shape s64_4d_tensor_10_9_8_7_5_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); + const Shape s64_4d_tensor_5_10_9_7_6_ = + ShapeUtil::MakeShape(S64, {5, 10, 9, 7, 6}); + const Shape s64_4d_tensor_10_9_5_7_6_ = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); const Shape f32_5d_tensor_50_49_48_47_46_ = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -1548,7 +1554,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {64, 32}))) @@ -1562,7 +1569,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherV2) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{1}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/1), /*window_bounds=*/{1, 48})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {32, 48}))) @@ -1576,7 +1584,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherNd) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 48})); EXPECT_TRUE(ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {10, 9, 8, 7, 48}))) @@ -1591,7 +1600,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); EXPECT_TRUE(ShapeUtil::Equal( gather_shape, @@ -1599,12 +1609,85 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { << ShapeUtil::HumanString(gather_shape); } +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_A) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_B) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_5_10_9_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NoOutputGatherDims) { + // This is equivalent to a dynamic slice. + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_vector_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3, 4}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, ScalarGatherIndices) { + // The gather indices "tensor" is a scalar S here that's used to slice out + // [S,0,0,0,0]..[S,30,29,28,27] into a [30,29,28,27] shaped result. + TF_ASSERT_OK_AND_ASSIGN(Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_scalar_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3}, + /*elided_window_dims=*/{0}, + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/0), + /*window_bounds=*/{1, 30, 29, 28, 27})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27}))) + << ShapeUtil::HumanString(gather_shape); +} + TEST_F(GatherShapeInferenceTest, TupleShapedTensorInput) { StatusOr statusor = ShapeInference::InferGatherShape( tuple_shape_, s64_vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1617,7 +1700,8 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { s64_vector_32_, tuple_shape_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1625,25 +1709,13 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { << statusor.status(); } -TEST_F(GatherShapeInferenceTest, ScalarGatherIndicesInput) { - StatusOr statusor = ShapeInference::InferGatherShape( - s64_vector_32_, s32_, - HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, - /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), - /*window_bounds=*/{64, 1}); - ASSERT_FALSE(statusor.ok()); - EXPECT_THAT(statusor.status().error_message(), - HasSubstr("Gather indices parameter must at least of rank 1")) - << statusor.status(); -} - TEST_F(GatherShapeInferenceTest, FloatingPointGatherIndicesInput) { StatusOr statusor = ShapeInference::InferGatherShape( s64_vector_32_, vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1658,7 +1730,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 8, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1674,7 +1747,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1690,7 +1764,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 99, 100, 101}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1698,6 +1773,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, + InvalidGatherDimNumbers_WindowIndexBarelyOutOfBounds) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_8_7_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 9}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), + /*window_bounds=*/{30, 29, 28, 27, 26}); + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Window index 4 in gather op is out of bounds")) + << statusor.status(); +} + TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_MismatchingElidedWindowDims) { StatusOr statusor = ShapeInference::InferGatherShape( @@ -1705,7 +1796,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{4}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1722,7 +1814,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 19}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1738,7 +1831,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 3}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1755,15 +1849,15 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( statusor.status().error_message(), - HasSubstr( - "There must be exactly as many elements in " - "gather_dims_to_operand_dims " - "as there are elements in the last dimension of %gather_indices")) + HasSubstr("Gather op has 4 elements in gather_dims_to_operand_dims and " + "the bound of dimension index_vector_dim=4 of " + "gather_indices is 5. These two numbers must be equal.")) << statusor.status(); } @@ -1774,7 +1868,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1791,7 +1886,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1808,7 +1904,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{2, 1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 1, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1822,7 +1919,8 @@ TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_WindowBoundsTooLarge) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{2}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 1, 300, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1838,7 +1936,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1855,7 +1954,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26, 20}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1864,5 +1964,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, OutOfBoundsGatherIndicesLeafDim) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/32), + /*window_bounds=*/{30, 29, 28, 27, 26}); + + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Gather index leaf dimension must be within [0, " + "rank(gather_indices) + 1)")) + << statusor.status(); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 28620c3b86..1f16e6d251 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -418,6 +418,10 @@ message GatherDimensionNumbers { // transforms the gather index looked up from the gather_indices tensor into // the starting index in the input space. repeated int64 gather_dims_to_operand_dims = 3; + + // The dimension in the gather_indices input that contains the starting + // indices. + int64 index_vector_dim = 4; } // Operation requests that are all collected as a tagged union with a oneof diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 1f7a3a1e2c..eaf6aeba3d 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -1050,6 +1050,9 @@ For a more intuitive description, see the "Informal Description" section below. : : : indices of the slices we're : : : : we're stitching together into : : : : the output tensor. : +|`index_vector_dim` | `int64` | The dimension in | +: : : `gather_indices` that contains : +: : : the starting indices. : |`output_window_dims` | `ArraySlice` | The set of dimensions in the | : : : output shape that are _window : : : : dimensions_ (defined below). : @@ -1066,22 +1069,20 @@ For a more intuitive description, see the "Informal Description" section below. : : : `output_window_dims`) and the window : : : : dimensions that are elided (via : : : : `elided_window_dims`). : -|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | +|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | : : : array is interpreted as mapping `i` to : : : : `gather_dims_to_operand_dims[i]`) from : : : : the gather indices in `gather_indices` to : : : : the operand index space. It has to be : : : : one-to-one and total. : -If `gather_indices` is a vector with `N` elements then we implicitly reshape it -to a tensor of shape `[N,1]` before proceeding. - For every index `Out` in the output tensor, we compute two things (more precisely described later): - - An index into the first `gather_indices.rank` - `1` dimensions of - `gather_indices`, which gives us a starting index of a slice, _operand - slice_, in the operand tensor. + - An index into `gather_indices.rank` - `1` dimensions of `gather_indices`, + which gives us a starting index of a slice, _operand slice_, in the operand + tensor. These `gather_indices.rank` - `1` dimensions are all the dimensions + in `gather_indices` except `index_vector_dim`. - A _window index_ that has the same rank as the operand. This index is composed of the values in `Out` at dimensions `output_window_dims`, embedded @@ -1093,29 +1094,42 @@ should be present in the output at index `Out`. The output is a tensor of rank `output_window_dims.size` + `gather_indices.rank` - `1`. Additionally, as a shorthand, we define `output_gather_dims` of type `ArraySlice` as the set of dimensions in the output shape but not in -`output_window_dims`, in ascending order. E.g. if the output tensor has rank 5, -`output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, `3`} +`output_window_dims`, in ascending order. E.g. if the output tensor has rank +`5`, `output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, +`3`} + +If `index_vector_dim` is equal to `gather_indices.rank` we implicitly +consider `gather_indices` to have a trailing `1` dimension (i.e. if +`gather_indices` was of shape `[6,7]` and `index_vector_dim` is `2` then +we implicitly consider the shape of `gather_indices` to be `[6,7,1]`). The bounds for the output tensor along dimension `i` is computed as follows: 1. If `i` is present in `output_gather_dims` (i.e. is equal to - `output_gather_dims[k]` for some `k`) then we pick the corresponding - dimension bounds out of `gather_indices.shape` (i.e. pick - `gather_indices.shape.dims[k]`). + `output_gather_dims[k]` for some `k`) then we pick the corresponding + dimension bounds out of `gather_indices.shape`, skipping + `index_vector_dim` (i.e. pick `gather_indices.shape.dims`[`k`] if `k` + < `index_vector_dim` and `gather_indices.shape.dims`[`k`+`1`] + otherwise). 2. If `i` is present in `output_window_dims` (i.e. equal to - `output_window_dims[k]` for some `k`) then we pick the corresponding bound - out of `window_bounds` after accounting for `elided_window_dims` (i.e. we - pick `adjusted_window_bounds[k]` where `adjusted_window_bounds` is - `window_bounds` with the bounds at indices `elided_window_dims` removed). + `output_window_dims`[`k`] for some `k`) then we pick the corresponding + bound out of `window_bounds` after accounting for `elided_window_dims` + (i.e. we pick `adjusted_window_bounds`[`k`] where `adjusted_window_bounds` + is `window_bounds` with the bounds at indices `elided_window_dims` + removed). The operand index `In` corresponding to an output index `Out` is computed as follows: 1. Let `G` = { `Out`[`k`] for `k` in `output_gather_dims` }. Use `G` to slice - out vector `S` such that `S`[`i`] = `gather_indices`[`G`, `i`]. - 2. Create an index, `S``in`, into `operand` using `S` by scattering - `S` using the `gather_dims_to_operand_dims` map (`S``in` is the - starting indices for _operand slice_ mentioned above.). More precisely: + out vector `S` such that `S`[`i`] = `gather_indices`[Combine(`G`, `i`)] + where Combine(A, b) inserts b at position `index_vector_dim` into A. + Note that this is well defined even if `G` is empty -- if `G` is empty then + `S` = `gather_indices`. + 2. Create an index, `S``in`, into `operand` using `S` by + scattering `S` using the `gather_dims_to_operand_dims` map + (`S``in` is the starting indices for _operand slice_ mentioned + above). More precisely: 1. `S``in`[`gather_dims_to_operand_dims`[`k`]] = `S`[`k`] if `k` < `gather_dims_to_operand_dims.size`. 2. `S``in`[`_`] = `0` otherwise. @@ -1136,7 +1150,12 @@ follows: `operand.rank` is `6` and `elided_window_dims` is {`0`, `2`} then `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}. -### Informal Description +### Informal Description and Examples + +`index_vector_dim` is set to `gather_indices.rank` - `1` in all of the +examples that follow. More interesting values for `index_vector_dim` +does not change the operation fundamentally, but makes the visual representation +more cumbersome. To get an intuition on how all of the above fits together, let's look at an example that gathers 5 slices of shape `[8,6]` from a `[16,11]` tensor. The -- GitLab From c6807e0c7c998f0e38e6930fca4a8cf667f791c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 10:24:08 -0800 Subject: [PATCH 003/311] Arithemtic optimization: Rewite Sub(0, y) => Neg(y) PiperOrigin-RevId: 187041872 --- .../grappler/optimizers/constant_folding.cc | 18 +++++++++++++++++- .../grappler/optimizers/constant_folding.h | 1 + .../optimizers/constant_folding_test.cc | 7 +++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 182e03f04e..10ca7dcce0 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1434,6 +1434,17 @@ void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node, graph_modified_ = true; } +void ConstantFolding::ReplaceSubtractionFromZeroByNegation(NodeDef* node, + GraphDef* graph) { + node->set_op("Neg"); + node->mutable_input()->SwapElements(0, 1); + const string ctrl_dep = + AddControlDependency(node->input(1), graph, node_map_.get()); + node_map_->UpdateInput(node->name(), node->input(1), ctrl_dep); + node->set_input(1, ctrl_dep); + graph_modified_ = true; +} + Status ConstantFolding::ReplaceOperationWithConstant( double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph) { @@ -1636,12 +1647,17 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); if (y_matches_output_shape && ((is_mul && x_is_one) || (is_add && x_is_zero))) { - // TODO(rmlarsen): Handle subtraction 0 - y. // 1 * y = y or 0 + y = y. ReplaceOperationWithSnapshot(1, node, output); continue; } + if (y_matches_output_shape && (is_sub && x_is_zero)) { + // Replace 0 - y with Neg(y). + ReplaceSubtractionFromZeroByNegation(node, output); + continue; + } + // Replace 1 / y with Reciprocal op. if (y_matches_output_shape && is_any_div && x_is_one) { DataType type = node->attr().at("T").type(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 232b2f9fa0..2fd59c7f9c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -82,6 +82,7 @@ class ConstantFolding : public GraphOptimizer { GraphDef* graph); void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, GraphDef* graph); + void ReplaceSubtractionFromZeroByNegation(NodeDef* node, GraphDef* graph); Status ReplaceOperationWithConstant(double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 219f3bd5ec..c6540192d7 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -286,10 +286,9 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "sub2") { - // We don't handle this case yet. - EXPECT_EQ("Sub", node.op()); - EXPECT_EQ("zeros", node.input(0)); - EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("Neg", node.op()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("^zeros", node.input(1)); } const std::set square_zero_const{"mul1", "mul2", "mul5", "mul6", "matmul1", "matmul2"}; -- GitLab From 3ce1adbdf7b1f9a4a53d5438985d12b6526dbd14 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 10:24:56 -0800 Subject: [PATCH 004/311] Move accumulate_n_v2 to core. PiperOrigin-RevId: 187042001 --- tensorflow/contrib/framework/BUILD | 38 ------ .../framework/python/ops/accumulate_n_v2.py | 111 ------------------ tensorflow/python/kernel_tests/BUILD | 34 ++++++ .../kernel_tests/accumulate_n_eager_test.py} | 27 ++--- .../kernel_tests/accumulate_n_test.py} | 34 +++--- tensorflow/python/ops/math_ops.py | 81 ++++++------- 6 files changed, 99 insertions(+), 226 deletions(-) delete mode 100644 tensorflow/contrib/framework/python/ops/accumulate_n_v2.py rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_eager_test.py => python/kernel_tests/accumulate_n_eager_test.py} (72%) rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_test.py => python/kernel_tests/accumulate_n_test.py} (79%) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index dbdb5cfaac..1accb319d2 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -28,7 +28,6 @@ tf_custom_op_py_library( "python/framework/graph_util.py", "python/framework/tensor_util.py", "python/ops/__init__.py", - "python/ops/accumulate_n_v2.py", "python/ops/arg_scope.py", "python/ops/audio_ops.py", "python/ops/checkpoint_ops.py", @@ -161,23 +160,6 @@ py_test( ], ) -py_test( - name = "accumulate_n_v2_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:platform_test", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - cuda_py_test( name = "critical_section_test", size = "medium", @@ -196,26 +178,6 @@ cuda_py_test( ], ) -py_test( - name = "accumulate_n_v2_eager_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_eager_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python/eager:backprop", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:tape", - "//third_party/py/numpy", - ], -) - py_test( name = "ops_test", size = "small", diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py deleted file mode 100644 index 476528b0dd..0000000000 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Ops that will eventually be folded into tensorflow/python/ops/math_ops.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from tensorflow.python.eager import context -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops import math_ops - - - -def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): - """Returns the element-wise sum of a list of tensors. - - Optionally, pass `shape` and `tensor_dtype` for shape and type checking, - otherwise, these are inferred. - - `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not - wait for all of its inputs to be ready before beginning to sum. This can - save memory if inputs are ready at different times, since minimum temporary - storage is proportional to the output size rather than the inputs size. - - Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. - - For example: - - ```python - a = tf.constant([[1, 2], [3, 4]]) - b = tf.constant([[5, 0], [0, 6]]) - tf.accumulate_n_v2([a, b, a]) # [[7, 4], [6, 14]] - - # Explicitly pass shape and type - tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) - # [[7, 4], - # [6, 14]] - ``` - - Args: - inputs: A list of `Tensor` objects, each with same shape and type. - shape: Shape of elements of `inputs`. - tensor_dtype: The type of `inputs`. - name: A name for the operation (optional). - - Returns: - A `Tensor` of same shape and type as the elements of `inputs`. - - Raises: - ValueError: If `inputs` don't all have same shape and dtype or the shape - cannot be inferred. - """ - _INPUTS_ERR_MSG = ValueError("inputs must be a list of at least one Tensor" - "with the same dtype and shape") - if not inputs or not isinstance(inputs, (list, tuple)): - raise _INPUTS_ERR_MSG - inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) - if not all(isinstance(x, ops.Tensor) for x in inputs): - raise _INPUTS_ERR_MSG - if not all(x.dtype == inputs[0].dtype for x in inputs): - raise _INPUTS_ERR_MSG - if shape is not None: - shape = tensor_shape.as_shape(shape) - else: - shape = tensor_shape.unknown_shape() - for input_tensor in inputs: - if isinstance(input_tensor, ops.Tensor): - shape = shape.merge_with(input_tensor.get_shape()) - - # tensor_dtype is for safety only; operator's output type computed in C++ - if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}" - .format(tensor_dtype, inputs[0].dtype)) - - if len(inputs) == 1 and name is None: - return inputs[0] - elif len(inputs) == 1 and name is not None: - return array_ops.identity(inputs[0], name=name) - elif context.in_eager_mode(): - # TemporaryVariable not currently supported in eager mode; fall back - # onto AddN for now. - # TODO(frreiss) remove this once the lifetime of eager variables gets - # addressed - return math_ops.add_n(inputs, name=name) - else: - return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) - -# The following code should eventually be merged into -# tensorflow/python/ops/math_grad.py -@ops.RegisterGradient("AccumulateNV2") -def _AddNGrad(op, grad): - """Same as gradient for AddN. Copies the gradient to all inputs.""" - # Not broadcasting. - return [grad] * len(op.inputs) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d4ceb2e489..c9aa4a252d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2892,6 +2892,40 @@ tf_py_test( ], ) +tf_py_test( + name = "accumulate_n_test", + size = "small", + srcs = ["accumulate_n_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + +tf_py_test( + name = "accumulate_n_eager_test", + size = "small", + srcs = ["accumulate_n_eager_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:tape", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py similarity index 72% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py rename to tensorflow/python/kernel_tests/accumulate_n_eager_test.py index 35974b9e21..dc11b7dece 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py @@ -12,48 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`. - -These test cases spefically exercise the `eager` APIs. They need to be in a -separate file from the remaining tests because eager mode is currently something -you can turn on but can't turn off for the lifetime of the current process.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test - class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testMinimalEagerMode(self): forty = constant_op.constant(40) two = constant_op.constant(2) - answer = av2.accumulate_n_v2([forty, two]) + answer = math_ops.accumulate_n([forty, two]) self.assertEqual(42, answer.numpy()) - def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).numpy()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).numpy()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).numpy()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).numpy()) def testGrad(self): np.random.seed(42) @@ -65,16 +58,14 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): ] def fn(first, second, third): - return av2.accumulate_n_v2([first, second, third]) + return math_ops.accumulate_n([first, second, third]) grad_fn = backprop.gradients_function(fn) grad = grad_fn(input_vars[0], input_vars[1], input_vars[2]) - self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 + self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 [elem.numpy() for elem in grad]) - if __name__ == "__main__": ops.enable_eager_execution() test.main() - diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py similarity index 79% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py rename to tensorflow/python/kernel_tests/accumulate_n_test.py index 45962098e9..0a6d4aea37 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_test.py @@ -12,42 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import googletest class AccumulateNV2Test(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).eval()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).eval()) def testInt(self): np.random.seed(54321) x = [np.random.randint(-128, 128, (5, 4, 3, 2, 1)) for _ in range(6)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllEqual(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllEqual(x[0] * 6, av2.accumulate_n_v2([tf_x[0]] * 6).eval()) + self.assertAllEqual(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllEqual(x[0] * 6, + math_ops.accumulate_n([tf_x[0]] * 6).eval()) def testGrad(self): np.random.seed(42) @@ -55,9 +55,9 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True) as sess: input_vars = [ variables.Variable(10.0 * np.random.random()) - for i in range(0, num_inputs) + for _ in range(0, num_inputs) ] - accum_n = av2.accumulate_n_v2(input_vars) + accum_n = math_ops.accumulate_n(input_vars) sess.run(variables.global_variables_initializer()) accum_n_grad = gradients.gradients(accum_n, input_vars) self.assertAllEqual( @@ -77,7 +77,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): ops.convert_to_tensor(x, dtype=dtypes_lib.float32) for x in random_arrays ] - tf_val = av2.accumulate_n_v2(random_tensors) + tf_val = math_ops.accumulate_n(random_tensors) np_val = random_arrays[0] for random_array in random_arrays[1:]: np_val += random_array @@ -86,7 +86,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): def testZeroArgs(self): with self.test_session(): with self.assertRaises(ValueError): - tf_val = av2.accumulate_n_v2([]) + tf_val = math_ops.accumulate_n([]) tf_val.eval() def testWrongShape(self): @@ -94,28 +94,28 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): a = variables.Variable(0.2) b = variables.Variable(0.1) - tf_val = av2.accumulate_n_v2([a, b], shape=[2, 2]) # Should be shape=[] + math_ops.accumulate_n([a, b], shape=[2, 2]) # Should be shape=[] def testIncompatibleShapes(self): with self.test_session(): with self.assertRaises(ValueError): a = variables.Variable(np.array([0.1, 0.2])) b = variables.Variable(np.array([[0.3], [0.4]])) - tf_val = av2.accumulate_n_v2([a, b]) + math_ops.accumulate_n([a, b]) def testWrongType(self): with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) b = variables.Variable(0.1, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a, b], tensor_dtype=np.int32) + math_ops.accumulate_n([a, b], tensor_dtype=np.int32) def testWrongTypeOneInput(self): # Scenario that used to trigger a bug, even when testWrongType() worked with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) + math_ops.accumulate_n([a], tensor_dtype=np.int32) if __name__ == "__main__": diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index a09540028f..c3899c7e12 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -158,14 +158,11 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_control_flow_ops from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import gen_spectral_ops -from tensorflow.python.ops import gen_state_ops -from tensorflow.python.ops import state_ops # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_math_ops import * @@ -2181,14 +2178,12 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): Optionally, pass `shape` and `tensor_dtype` for shape and type checking, otherwise, these are inferred. - NOTE: This operation is not differentiable and cannot be used if inputs depend - on trainable variables. Please use `tf.add_n` for such cases. + `tf.accumulate_n` performs the same operation as `tf.add_n`, but does not + wait for all of its inputs to be ready before beginning to sum. This can + save memory if inputs are ready at different times, since minimum temporary + storage is proportional to the output size rather than the inputs size. - Aside from differentiability, `tf.accumulate_n` performs the same operation as - `tf.add_n`, but does not wait for all of its inputs to be ready before - beginning to sum. This can save memory if inputs are ready at different times, - since minimum temporary storage is proportional to the output size rather than - the inputs size. + `accumulate_n` is differentiable (but wasn't previous to TensorFlow 1.7). For example: @@ -2198,8 +2193,9 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): tf.accumulate_n([a, b, a]) # [[7, 4], [6, 14]] # Explicitly pass shape and type - tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) # [[7, 4], - # [6, 14]] + tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) + # [[7, 4], + # [6, 14]] ``` Args: @@ -2215,20 +2211,17 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): ValueError: If `inputs` don't all have same shape and dtype or the shape cannot be inferred. """ - if context.in_eager_mode(): - # TODO(apassos) remove this once the lifetime of eager variables gets - # addressed. - raise ValueError("accumulate_n not supported in eager mode") + def _input_error(): + return ValueError( + "inputs must be a list of at least one Tensor with the " + "same dtype and shape") if not inputs or not isinstance(inputs, (list, tuple)): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) if not all(isinstance(x, ops.Tensor) for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if not all(x.dtype == inputs[0].dtype for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if shape is not None: shape = tensor_shape.as_shape(shape) else: @@ -2236,27 +2229,31 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): for input_tensor in inputs: if isinstance(input_tensor, ops.Tensor): shape = shape.merge_with(input_tensor.get_shape()) - if tensor_dtype is None: - tensor_dtype = inputs[0].dtype - if tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}".format( - tensor_dtype, inputs[0].dtype)) - if len(inputs) == 1: + + # tensor_dtype is for safety only; operator's output type computed in C++ + if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: + raise TypeError("tensor_dtype is {}, but input is of type {}" + .format(tensor_dtype, inputs[0].dtype)) + + if len(inputs) == 1 and name is None: return inputs[0] - with ops.name_scope(name, "AccumulateN", inputs) as name: - var = gen_state_ops._temporary_variable( - shape=tensor_shape.vector(0), dtype=tensor_dtype) - with ops.colocate_with(var): - zeros = array_ops.zeros_like(gen_control_flow_ops._merge(inputs)[0]) - zeros.set_shape(shape) - ref = state_ops.assign(var, zeros, validate_shape=False) - update_ops = [ - state_ops.assign_add(ref, input_tensor, use_locking=True) - for input_tensor in inputs - ] - with ops.control_dependencies(update_ops): - return gen_state_ops._destroy_temporary_variable( - ref, var_name=var.op.name, name=name) + elif len(inputs) == 1 and name is not None: + return array_ops.identity(inputs[0], name=name) + elif context.in_eager_mode(): + # TemporaryVariable not currently supported in eager mode; fall back + # onto AddN for now. + # TODO(frreiss) remove this once the lifetime of eager variables gets + # addressed + return add_n(inputs, name=name) + else: + return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access + + +@ops.RegisterGradient("AccumulateNV2") +def _accumulate_n_grad(op, grad): + """Same as gradient for AddN. Copies the gradient to all inputs.""" + # Not broadcasting. + return [grad] * len(op.inputs) @tf_export("nn.sigmoid", "sigmoid") -- GitLab From 0b94d6270866789d210d1914e60937b6f231a669 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 26 Feb 2018 10:41:44 -0800 Subject: [PATCH 005/311] Deleting references to outdated `translate/seq2seq` tutorial. PiperOrigin-RevId: 187044697 --- tensorflow/tools/ci_build/builds/test_tutorials.sh | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/test_tutorials.sh b/tensorflow/tools/ci_build/builds/test_tutorials.sh index 67e5af5564..db335f14ca 100755 --- a/tensorflow/tools/ci_build/builds/test_tutorials.sh +++ b/tensorflow/tools/ci_build/builds/test_tutorials.sh @@ -277,17 +277,6 @@ test_ptb_word_lm() { fi } - -# ----------------------------------------------------------- -# translate_test -test_translate_test() { - LOG_FILE=$1 - - run_in_directory "${TEST_DIR}" "${LOG_FILE}" \ - "${TF_MODELS_DIR}/tutorials/rnn/translate/translate.py" --self_test=True -} - - # Run the tutorial tests test_runner "tutorial test-on-install" \ "${TUT_TESTS}" "${TF_BUILD_TUT_TEST_BLACKLIST}" "${LOGS_DIR}" -- GitLab From ca328de4d8805a7495485e787811484d843c43a2 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 26 Feb 2018 10:42:59 -0800 Subject: [PATCH 006/311] [XLA] Add kConvert to EffectiveOperandPrecisionIsOutputPrecision list. PiperOrigin-RevId: 187044921 --- tensorflow/compiler/xla/service/bfloat16_support.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc index 3fd9e24601..07b4b14b5e 100644 --- a/tensorflow/compiler/xla/service/bfloat16_support.cc +++ b/tensorflow/compiler/xla/service/bfloat16_support.cc @@ -79,6 +79,7 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision( case HloOpcode::kBroadcast: case HloOpcode::kClamp: case HloOpcode::kConcatenate: + case HloOpcode::kConvert: case HloOpcode::kCopy: case HloOpcode::kGetTupleElement: case HloOpcode::kMaximum: -- GitLab From 7735b2db761fba6e76c170066b2e5c3b7f10688b Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 10:52:05 -0800 Subject: [PATCH 007/311] [XLA] Do not recompute flattened sets inside layout assignment. Cache the flattened sets instead of recomputing them. This matters for large graphs, since we may request the flattened set thousands of times on the same instruction, and it may be fairly expensive to construct for large tuples. PiperOrigin-RevId: 187046642 --- .../compiler/xla/service/layout_assignment.cc | 31 ++++++++++++++----- .../compiler/xla/service/layout_assignment.h | 10 ++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 0668f66051..4929300f7d 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -192,17 +192,34 @@ LayoutConstraints::LayoutConstraints( } } +PointsToSet::BufferSet* LayoutConstraints::GetBufferSet( + const HloInstruction* instruction) const { + auto it = buffer_sets_cache_.find(instruction); + if (it != buffer_sets_cache_.end()) { + return it->second.get(); + } + auto& buffer_set = + buffer_sets_cache_ + .emplace(instruction, MakeUnique()) + .first->second; + const auto& points_to_set = points_to_analysis_.GetPointsToSet(instruction); + points_to_set.ForEachElement( + [&buffer_set](const ShapeIndex& /*index*/, + const PointsToSet::BufferList& buffers) { + buffer_set->insert(buffers.begin(), buffers.end()); + }); + return buffer_set.get(); +} + bool LayoutConstraints::OperandBufferForwarded( const HloInstruction* instruction, int64 operand_no) const { // The operand is potentially forwarded if the intersection of points-to sets // of the operand and the instruction is non-empty. - auto output_buffers = - points_to_analysis_.GetPointsToSet(instruction).CreateFlattenedSet(); - auto operand_buffers = - points_to_analysis_.GetPointsToSet(instruction->operand(operand_no)) - .CreateFlattenedSet(); - for (const LogicalBuffer* output_buffer : output_buffers) { - if (operand_buffers.count(output_buffer) > 0) { + PointsToSet::BufferSet* output_buffers = GetBufferSet(instruction); + PointsToSet::BufferSet* operand_buffers = + GetBufferSet(instruction->operand(operand_no)); + for (const LogicalBuffer* output_buffer : *output_buffers) { + if (operand_buffers->count(output_buffer) > 0) { return true; } } diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 2901858448..7126cb50cf 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -199,6 +200,11 @@ class LayoutConstraints { string ToString() const; private: + // Find a bufferset in the bufferset cache. This is useful since we can + // currently create the flattened buffer set for the same instruction many + // times, which is often slow. + PointsToSet::BufferSet* GetBufferSet(const HloInstruction* instruction) const; + // The set of BufferLayoutConstraints applied to the computation. std::unordered_map buffer_constraints_; @@ -221,6 +227,10 @@ class LayoutConstraints { // Array-shaped buffers which have not yet been constrained. std::set unconstrained_buffer_ids_; + mutable tensorflow::gtl::FlatMap> + buffer_sets_cache_; + HloComputation* computation_; }; -- GitLab From 5a657b47f724b96730f764d3fb21c89e342e9c35 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 10:54:31 -0800 Subject: [PATCH 008/311] Integrate ClusterResolvers with TPUEstimator. PiperOrigin-RevId: 187047094 --- tensorflow/contrib/cluster_resolver/BUILD | 1 + .../python/training/cluster_resolver.py | 23 +- .../python/training/cluster_resolver_test.py | 2 + .../python/training/gce_cluster_resolver.py | 3 + .../python/training/tpu_cluster_resolver.py | 150 +++++++++--- .../training/tpu_cluster_resolver_test.py | 226 +++++++++++++----- .../contrib/tpu/python/tpu/tpu_config.py | 31 +++ 7 files changed, 345 insertions(+), 91 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/BUILD b/tensorflow/contrib/cluster_resolver/BUILD index 6b03df2b8e..1a124eca36 100644 --- a/tensorflow/contrib/cluster_resolver/BUILD +++ b/tensorflow/contrib/cluster_resolver/BUILD @@ -110,5 +110,6 @@ tf_py_test( "//tensorflow/python:platform_test", "//tensorflow/python:training", ], + grpc_enabled = True, main = "python/training/tpu_cluster_resolver_test.py", ) diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py index b04822fa9d..1c480b2513 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py @@ -53,11 +53,16 @@ class ClusterResolver(object): raise NotImplementedError( 'cluster_spec is not implemented for {}.'.format(self)) + @abc.abstractmethod + def master(self): + """...""" + raise NotImplementedError('master is not implemented for {}.'.format(self)) + class SimpleClusterResolver(ClusterResolver): """Simple implementation of ClusterResolver that accepts a ClusterSpec.""" - def __init__(self, cluster_spec): + def __init__(self, cluster_spec, master=''): """Creates a SimpleClusterResolver from a ClusterSpec.""" super(SimpleClusterResolver, self).__init__() @@ -65,10 +70,18 @@ class SimpleClusterResolver(ClusterResolver): raise TypeError('cluster_spec must be a ClusterSpec.') self._cluster_spec = cluster_spec + if not isinstance(master, str): + raise TypeError('master must be a string.') + self._master = master + def cluster_spec(self): """Returns the ClusterSpec passed into the constructor.""" return self._cluster_spec + def master(self): + """Returns the master address to use when creating a session.""" + return self._master + class UnionClusterResolver(ClusterResolver): """Performs a union on underlying ClusterResolvers. @@ -87,9 +100,13 @@ class UnionClusterResolver(ClusterResolver): Raises: TypeError: If any argument is not a subclass of `ClusterResolvers`. + ValueError: If there are no arguments passed. """ super(UnionClusterResolver, self).__init__() + if not args: + raise ValueError('At least one ClusterResolver is required.') + for cluster_resolver in args: if not isinstance(cluster_resolver, ClusterResolver): raise TypeError('All arguments must be a sub-class of ' @@ -169,3 +186,7 @@ class UnionClusterResolver(ClusterResolver): merged_cluster[job_name].update(task_dict) return ClusterSpec(merged_cluster) + + def master(self): + """master returns the master address from the first cluster resolver.""" + return self._cluster_resolvers[0].master() diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py index dbfb77723c..d9c97d53eb 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py @@ -234,5 +234,7 @@ class UnionClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(cluster_spec, expected_proto) +# TODO(saeta): Include tests for master resolution + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py index d6f2eced93..3f58241289 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py @@ -134,3 +134,6 @@ class GceClusterResolver(ClusterResolver): worker_list.sort() return ClusterSpec({self._job_name: worker_list}) + + def master(self): + return '' diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index a6a6e642e4..aeccf4c06b 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -23,7 +23,8 @@ from six.moves.urllib.request import Request from six.moves.urllib.request import urlopen from tensorflow.contrib.cluster_resolver.python.training.cluster_resolver import ClusterResolver -from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat _GOOGLE_API_CLIENT_INSTALLED = True try: @@ -46,13 +47,23 @@ class TPUClusterResolver(ClusterResolver): req = Request('http://metadata/computeMetadata/v1/%s' % path, headers={'Metadata-Flavor': 'Google'}) resp = urlopen(req) - return resp.read() + return compat.as_bytes(resp.read()) + + def _shouldResolve(self): + if (self._tpu == compat.as_bytes('') or + self._tpu == compat.as_bytes('local') or + self._tpu.startswith(compat.as_bytes('/bns')) or + self._tpu.startswith(compat.as_bytes('grpc://'))): + return False + return True def __init__(self, - tpu_names, + tpu, zone=None, project=None, - job_name='tpu_worker', + job_name='worker', + coordinator_name='coordinator', + coordinator_address=None, credentials='default', service=None): """Creates a new TPUClusterResolver object. @@ -61,7 +72,11 @@ class TPUClusterResolver(ClusterResolver): for the IP addresses and ports of each Cloud TPU listed. Args: - tpu_names: A list of names of the target Cloud TPUs. + tpu: Either a string, or a list of strings corresponding to the TPUs to + use. If the single string is the empty string, the string 'local', or a + string that begins with 'grpc://' or '/bns', then it is assumed to not + correspond with a Cloud TPU and will instead be passed as the session + master and no ClusterSpec propagation will be done. zone: Zone where the TPUs are located. If omitted or empty, we will assume that the zone of the TPU is the same as the zone of the GCE VM, which we will try to discover from the GCE metadata service. @@ -69,6 +84,12 @@ class TPUClusterResolver(ClusterResolver): empty, we will try to discover the project name of the GCE VM from the GCE metadata service. job_name: Name of the TensorFlow job the TPUs belong to. + coordinator_name: The name to use for the coordinator. Set to None if the + coordinator should not be included in the computed ClusterSpec. + coordinator_address: The address of the coordinator (typically an ip:port + pair). If set to None, a TF server will be started. If coordinator_name + is None, a TF server will not be started even if coordinator_address is + None. credentials: GCE Credentials. If None, then we use default credentials from the oauth2client service: The GCE API object returned by the googleapiclient.discovery @@ -77,26 +98,36 @@ class TPUClusterResolver(ClusterResolver): Raises: ImportError: If the googleapiclient is not installed. + ValueError: If no TPUs are specified. """ + if isinstance(tpu, list): + if not tpu: + raise ValueError('At least one TPU must be specified.') + if len(tpu) != 1: + raise NotImplementedError( + 'Using multiple TPUs in a single session is not yet implemented') + tpu = tpu[0] + self._tpu = compat.as_bytes(tpu) # self._tpu is always bytes + self._job_name = job_name + self._credentials = credentials - if not project: - project = self._requestComputeMetadata('/project/project-id') + should_resolve = self._shouldResolve() - if not zone: - zone_path = self._requestComputeMetadata('/instance/zone') + if not project and should_resolve: + project = self._requestComputeMetadata('project/project-id') + + if not zone and should_resolve: + zone_path = self._requestComputeMetadata('instance/zone') zone = zone_path.split('/')[-1] self._project = project self._zone = zone - self._tpu_names = tpu_names - self._job_name = job_name - self._credentials = credentials - if credentials == 'default': + if credentials == 'default' and should_resolve: if _GOOGLE_API_CLIENT_INSTALLED: self._credentials = GoogleCredentials.get_application_default() - if service is None: + if service is None and should_resolve: if not _GOOGLE_API_CLIENT_INSTALLED: raise ImportError('googleapiclient must be installed before using the ' 'TPU cluster resolver') @@ -107,25 +138,41 @@ class TPUClusterResolver(ClusterResolver): else: self._service = service - def get_master(self): - """Get the ClusterSpec grpc master path. + self._coordinator_name = coordinator_name + if coordinator_name and not coordinator_address and should_resolve: + self._start_local_server() + else: + self._coordinator_address = coordinator_address + + def master(self): + """Get the Master string to be used for the session. + + In the normal case, this returns the grpc path (grpc://1.2.3.4:8470) of + first instance in the ClusterSpec returned by the cluster_spec function. - This returns the grpc path (grpc://1.2.3.4:8470) of first instance in the - ClusterSpec returned by the cluster_spec function. This is suitable for use - for the `master` argument in tf.Session() when you are using one TPU. + If a non-TPU name is used when constructing a TPUClusterResolver, that will + be returned instead (e.g. If the tpus argument's value when constructing + this TPUClusterResolver was 'grpc://10.240.1.2:8470', + 'grpc://10.240.1.2:8470' will be returned). Returns: - string, the grpc path of the first instance in the ClusterSpec. + string, the connection string to use when creating a session. Raises: ValueError: If none of the TPUs specified exists. """ + if not self._shouldResolve(): + return self._tpu + job_tasks = self.cluster_spec().job_tasks(self._job_name) if not job_tasks: raise ValueError('No TPUs exists with the specified names exist.') return 'grpc://' + job_tasks[0] + def get_master(self): + return self.master() + def cluster_spec(self): """Returns a ClusterSpec object based on the latest TPU information. @@ -134,17 +181,54 @@ class TPUClusterResolver(ClusterResolver): Returns: A ClusterSpec containing host information returned from Cloud TPUs. - """ - worker_list = [] - - for tpu_name in self._tpu_names: - full_name = 'projects/%s/locations/%s/nodes/%s' % ( - self._project, self._zone, tpu_name) - request = self._service.projects().locations().nodes().get(name=full_name) - response = request.execute() - if 'health' in response and response['health'] == 'HEALTHY': - instance_url = '%s:%s' % (response['ipAddress'], response['port']) - worker_list.append(instance_url) - - return ClusterSpec({self._job_name: worker_list}) + Raises: + RuntimeError: If the provided TPU is not healthy. + """ + if not self._shouldResolve(): + return server_lib.ClusterSpec({}) + + full_name = 'projects/%s/locations/%s/nodes/%s' % ( + self._project, self._zone, compat.as_text(self._tpu)) + request = self._service.projects().locations().nodes().get(name=full_name) + response = request.execute() + + if 'health' in response and response['health'] != 'HEALTHY': + raise RuntimeError('TPU "%s" is unhealthy: "%s"' % (self._tpu, + response['health'])) + + if 'networkEndpoints' in response: + worker_list = [ + '%s:%s' % (endpoint['ipAddress'], endpoint['port']) + for endpoint in response['networkEndpoints'] + ] + else: + # Fall back to the deprecated response format + instance_url = '%s:%s' % (response['ipAddress'], response['port']) + worker_list = [instance_url] + + cluster_spec = {self._job_name: worker_list} + + if self._coordinator_address: + cluster_spec[self._coordinator_name] = [self._coordinator_address] + + return server_lib.ClusterSpec(cluster_spec) + + def _start_local_server(self): + address = self._requestComputeMetadata('instance/network-interfaces/0/ip') + self._server = server_lib.Server( + { + 'local': ['0.0.0.0:0'] + }, protocol='grpc', config=None, start=True) + # self._server.target is of the form: grpc://ipaddress:port + target = compat.as_bytes(self._server.target) + splits = target.split(compat.as_bytes(':')) + assert len(splits) == 3, self._server.target + assert splits[0] == compat.as_bytes('grpc'), self._server.target + self._coordinator_port = compat.as_text(splits[2]) + self._coordinator_address = '%s:%s' % ( + address, compat.as_text(self._coordinator_port)) + + def __deepcopy__(self, memo): + # TODO(b/73668574): Remove this once RunConfig avoids performing deepcopy. + return self diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 4fd34629cf..6b4a155152 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.contrib.cluster_resolver.python.training.tpu_cluster_resolver import TPUClusterResolver from tensorflow.python.platform import test from tensorflow.python.training import server_lib - +from tensorflow.python.util import compat mock = test.mock @@ -50,10 +50,12 @@ class MockNodeClass(object): def mock_request_compute_metadata(cls, *args, **kwargs): del cls, kwargs # Unused. - if args[0] == '/project/project-id': + if args[0] == 'project/project-id': return 'test-project' - elif args[0] == '/instance/zone': + elif args[0] == 'instance/zone': return 'projects/test-project/locations/us-central1-c' + elif args[0] == 'instance/network-interfaces/0/ip': + return '10.128.1.2' return '' @@ -113,17 +115,26 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver( project=None, zone=None, - tpu_names=['test-tpu-1'], + tpu=['test-tpu-1'], credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } - """ - self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + job { + name: 'coordinator' + tasks { key: 0 value: '10.128.1.2:%s' } + } + job { + name: 'worker' + tasks { key: 0 value: '10.1.2.3:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) - def testSimpleSuccessfulRetrieval(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', @@ -133,116 +144,217 @@ class TPUClusterResolverTest(test.TestCase): } tpu_cluster_resolver = TPUClusterResolver( - project='test-project', - zone='us-central1-c', - tpu_names=['test-tpu-1'], + project=None, + zone=None, + tpu=['test-tpu-1'], + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testMultipleSuccessfulRetrieval(self): + def testSimpleSuccessfulRetrieval(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', 'port': '8470', 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu=['test-tpu-1'], + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.4.5.6:8470' } - tasks { key: 1 value: '10.1.2.3:8470' } } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testHealthyTpuNodeRetrieval(self): + def testNewNetworkEndpointFormat(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-3': { - 'ipAddress': '10.7.8.9', - 'port': '8470', - 'health': 'UNHEALTHY' + 'health': 'HEALTHY', + 'networkEndpoints': [{ + 'ipAddress': '10.2.3.4', + 'port': 8470, + }] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1', 'test-tpu-3'], + tpu='test-tpu-1', + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { - name: 'tpu_worker' - tasks { - key: 0 - value: '10.1.2.3:8470' - } - } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.2.3.4:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + self.assertEqual('grpc://10.2.3.4:8470', tpu_cluster_resolver.master()) - def testGetMasterMultipleEntries(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testPodResolution(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] + } + } + + tpu_cluster_resolver = TPUClusterResolver( + tpu='test-tpu-1', + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'coordinator', + tasks { key: 0 value: '10.128.1.2:%s'} + } + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) + + def testPodResolutionNoCoordinator(self): + tpu_map = { + 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu='test-tpu-1', + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) - self.assertEqual('grpc://10.4.5.6:8470', tpu_cluster_resolver.get_master()) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ + self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) def testGetMasterNoEntries(self): tpu_map = {} + with self.assertRaises(ValueError): + TPUClusterResolver( + project='test-project', + zone='us-central1-c', + tpu=[], + coordinator_name=None, + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + # TODO(saeta): Convert to parameterized test when included in OSS TF. + def verifyShouldResolve(self, tpu, should_resolve): tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=[], + tpu=tpu, + coordinator_name=None, credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) - with self.assertRaises(ValueError): - tpu_cluster_resolver.get_master() + service=self.mock_service_client(tpu_map={})) + self.assertEqual(should_resolve, tpu_cluster_resolver._shouldResolve(), + "TPU: '%s'" % tpu) + + def testShouldResolveNoName(self): + self.verifyShouldResolve('', False) + + def testShouldResolveLocal(self): + self.verifyShouldResolve('local', False) + + def testShouldResolveGrpc(self): + self.verifyShouldResolve('grpc://10.1.2.3:8470', False) + + def testShouldResolveBns(self): + self.verifyShouldResolve('/bns/foo/bar', False) + + def testShouldResolveName(self): + self.verifyShouldResolve('mytpu', True) + + def testShouldResolveList(self): + self.verifyShouldResolve(['myothertpu'], True) + + def testShouldResolveGrpcPrefix(self): + self.verifyShouldResolve('grpctpu', True) + + def testNoCallComputeMetadata(self): + tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar') + self.assertEqual(compat.as_bytes('/bns/foo/bar'), + tpu_cluster_resolver.master()) + self.assertEqual( + server_lib.ClusterSpec({}), tpu_cluster_resolver.cluster_spec()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 6440702182..7ceb4069cf 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -26,6 +26,7 @@ import os import numpy as np from tensorflow.contrib.tpu.python.tpu import util as util_lib +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.platform import tf_logging as logging @@ -140,6 +141,7 @@ class RunConfig(run_config_lib.RunConfig): tpu_config=None, evaluation_master=None, master=None, + cluster=None, **kwargs): """Constructs a RunConfig. @@ -148,15 +150,26 @@ class RunConfig(run_config_lib.RunConfig): evaluation_master: a string. The address of the master to use for eval. Defaults to master if not set. master: a string. The address of the master to use for training. + cluster: a ClusterResolver **kwargs: keyword config parameters. + + Raises: + ValueError: if cluster is not None and the provided session_config has a + cluster_def already. """ super(RunConfig, self).__init__(**kwargs) self._tpu_config = tpu_config or TPUConfig() + self._cluster = cluster # If user sets master and/or evaluation_master explicilty, including empty # string '', take it. Otherwise, take the values set by parent class. if master is not None: + if cluster is not None: + raise ValueError('Both master and cluster are set.') self._master = master + else: + if cluster: + self._master = cluster.master() if evaluation_master is not None: self._evaluation_master = evaluation_master @@ -170,6 +183,20 @@ class RunConfig(run_config_lib.RunConfig): # evaluation_master to master, unless user overwrites it. self._evaluation_master = self._master + # Set the ClusterSpec to use + if cluster: + self._cluster_spec = cluster.cluster_spec() + + # Merge the cluster_def into the ConfigProto. + if self._session_config is None: # pylint: disable=access-member-before-definition + self._session_config = config_pb2.ConfigProto(allow_soft_placement=True) + if self._session_config.HasField('cluster_def'): + raise ValueError( + 'You cannot provide a ClusterResolver and ' + 'session_config.cluster_def.') + self._session_config.cluster_def.CopyFrom( + self._cluster_spec.as_cluster_def()) + @property def evaluation_master(self): return self._evaluation_master @@ -182,6 +209,10 @@ class RunConfig(run_config_lib.RunConfig): def tpu_config(self): return self._tpu_config + @property + def cluster(self): + return self._cluster + def replace(self, **kwargs): if 'tpu_config' not in kwargs: return super(RunConfig, self).replace(**kwargs) -- GitLab From 24c619b6c4dd38fc4ef0f51b92e5f16809cc4ec8 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Mon, 26 Feb 2018 10:59:54 -0800 Subject: [PATCH 009/311] Automated g4 rollback of changelist 185324160 PiperOrigin-RevId: 187048135 --- tensorflow/contrib/cmake/tf_core_cpu.cmake | 7 ++ tensorflow/contrib/makefile/Makefile | 1 + .../core/common_runtime/gpu/gpu_id_manager.cc | 50 +++++++-- .../core/common_runtime/gpu/gpu_id_manager.h | 14 ++- tensorflow/core/grappler/clusters/BUILD | 26 ++++- .../core/grappler/clusters/single_machine.cc | 17 ++- tensorflow/core/grappler/clusters/utils.cc | 71 ++++++++----- tensorflow/core/grappler/clusters/utils.h | 3 +- .../core/grappler/clusters/utils_test.cc | 100 ++++++++++++++++++ tensorflow/core/grappler/costs/BUILD | 1 + tensorflow/core/grappler/costs/utils.cc | 18 +++- 11 files changed, 262 insertions(+), 46 deletions(-) create mode 100644 tensorflow/core/grappler/clusters/utils_test.cc diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake index 96ac60d095..a54cbff33b 100644 --- a/tensorflow/contrib/cmake/tf_core_cpu.cmake +++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake @@ -63,6 +63,12 @@ file(GLOB_RECURSE tf_core_cpu_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc" ) +file(GLOB_RECURSE tf_core_cpu_whitelisted_srcs + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.h" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc" +) +list(REMOVE_ITEM tf_core_cpu_exclude_srcs ${tf_core_cpu_whitelisted_srcs}) list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs}) if (tensorflow_ENABLE_GPU) @@ -79,6 +85,7 @@ if (tensorflow_ENABLE_GPU) "${tensorflow_source_dir}/tensorflow/core/*test*.cc" ) list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_gpu_exclude_srcs}) + list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_cpu_whitelisted_srcs}) list(APPEND tf_core_cpu_srcs ${tf_core_gpu_srcs}) endif() diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 81327407d4..05e8d9064b 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -677,6 +677,7 @@ endif # TEGRA TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) # Add in any extra files that don't fit the patterns easily TF_CC_SRCS += tensorflow/contrib/makefile/downloads/fft2d/fftsg.c +TF_CC_SRCS += tensorflow/core/common_runtime/gpu/gpu_id_manager.cc # Also include the op and kernel definitions. TF_CC_SRCS += $(shell cat $(MAKEFILE_DIR)/tf_op_files.txt) PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt) diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc index 207afdca75..7dfff3269c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc @@ -18,7 +18,10 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" namespace tensorflow { @@ -27,8 +30,8 @@ namespace { class TfToCudaGpuIdMap { public: static TfToCudaGpuIdMap* singleton() { - static auto* manager = new TfToCudaGpuIdMap; - return manager; + static auto* id_map = new TfToCudaGpuIdMap; + return id_map; } void InsertOrDie(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) @@ -47,18 +50,41 @@ class TfToCudaGpuIdMap { } } - int32 FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { + CudaGpuId FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); + return FindOrDieLocked(tf_gpu_id); + } + + bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const + LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + if (id_map_.count(tf_gpu_id.value()) == 0) return false; + *cuda_gpu_id = FindOrDieLocked(tf_gpu_id); + return true; + } + + private: + TfToCudaGpuIdMap() = default; + + CudaGpuId FindOrDieLocked(TfGpuId tf_gpu_id) const + EXCLUSIVE_LOCKS_REQUIRED(mu_) { auto result = id_map_.find(tf_gpu_id.value()); CHECK(result != id_map_.end()) << "Could not find the mapping for TfGpuId: " << tf_gpu_id; - return result->second; + return CudaGpuId(result->second); + } + + void TestOnlyReset() LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + id_map_.clear(); } - private: using IdMapType = std::unordered_map; mutable mutex mu_; IdMapType id_map_ GUARDED_BY(mu_); + + friend class ::tensorflow::GpuIdManager; + TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap); }; } // namespace @@ -67,8 +93,20 @@ void GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, TfToCudaGpuIdMap::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id); } +Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { + if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) { + return Status::OK(); + } + return errors::NotFound("TF GPU device with id ", tf_gpu_id.value(), + " was not registered"); +} + CudaGpuId GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id) { - return CudaGpuId(TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id)); + return TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id); +} + +void GpuIdManager::TestOnlyReset() { + TfToCudaGpuIdMap::singleton()->TestOnlyReset(); } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h index 33925d8c36..2b54cc184c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h @@ -17,15 +17,25 @@ limitations under the License. #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_MANAGER_H_ #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/status.h" namespace tensorflow { -// Class that manages the translation between Tensorflow GPU ids and CUDA GPU -// ids. +// Class that maintains a map from TfGpuId to CudaGpuId, and manages the +// translation between them. class GpuIdManager { public: + // Adds a mapping from tf_gpu_id to cuda_gpu_id. static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); + + // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found. + static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id); + // Similar to the above version, but returns the result, and checks fail if + // no result is found. static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id); + + // Clears the map. Used in unit tests only. + static void TestOnlyReset(); }; } // namespace tensorflow diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD index b8f8e13c9a..b653f902e8 100644 --- a/tensorflow/core/grappler/clusters/BUILD +++ b/tensorflow/core/grappler/clusters/BUILD @@ -1,7 +1,12 @@ licenses(["notice"]) # Apache 2.0 +load("//tensorflow:tensorflow.bzl", "if_cuda") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") +load( + "//tensorflow/core:platform/default/build_config_root.bzl", + "tf_cuda_tests_tags", +) filegroup( name = "all_files", @@ -26,13 +31,12 @@ config_setting( tf_cuda_library( name = "utils", srcs = ["utils.cc"], - hdrs = [ - "utils.h", - ], + hdrs = ["utils.h"], visibility = ["//visibility:public"], deps = [ "//third_party/eigen3", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ] + select({ @@ -41,6 +45,21 @@ tf_cuda_library( }), ) +tf_cc_test( + name = "utils_test", + srcs = ["utils_test.cc"], + linkstatic = if_cuda(1, 0), + tags = tf_cuda_tests_tags(), + deps = [ + ":utils", + "//tensorflow/core:gpu_id", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "cluster", srcs = ["cluster.cc"], @@ -104,6 +123,7 @@ cc_library( "//tensorflow/core:core_cpu_lib", "//tensorflow/core:direct_session", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core/grappler:utils", "//tensorflow/core/kernels:ops_util", diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index cc7f418d49..8e236c9ee8 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -21,6 +21,8 @@ limitations under the License. #include "tensorflow/cc/training/queue_runner.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/grappler/clusters/utils.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/kernels/ops_util.h" @@ -80,13 +82,24 @@ Status SingleMachine::Provision() { std::vector devices; TF_RETURN_IF_ERROR(session_->ListDevices(&devices)); - int gpu_id = 0; for (const auto& dev : devices) { DeviceProperties attr; if (dev.device_type() == "CPU") { attr = GetLocalCPUInfo(); } else if (dev.device_type() == "GPU") { - attr = GetLocalGPUInfo(gpu_id++); + DeviceNameUtils::ParsedName parsed; + if (!DeviceNameUtils::ParseFullName(dev.name(), &parsed)) { + return errors::InvalidArgument( + strings::StrCat("Not able to parse GPU device name: ", dev.name())); + } + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + return errors::Unavailable("Unknown TF GPU device with id ", + tf_gpu_id.value(), ": ", s.ToString()); + } + attr = GetLocalGPUInfo(cuda_gpu_id); } else if (dev.device_type().find("XLA") == string::npos) { // Filter out the fake XLA devices to avoid double counting the actual // hardware resources that are available. diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc index 607e10e1ab..b54b34959a 100644 --- a/tensorflow/core/grappler/clusters/utils.cc +++ b/tensorflow/core/grappler/clusters/utils.cc @@ -27,6 +27,9 @@ limitations under the License. #include "include/libxsmm.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" @@ -66,36 +69,40 @@ DeviceProperties GetLocalCPUInfo() { return device; } -DeviceProperties GetLocalGPUInfo(int gpu_id) { +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) { DeviceProperties device; device.set_type("GPU"); #if GOOGLE_CUDA cudaDeviceProp properties; - cudaError_t error = cudaGetDeviceProperties(&properties, gpu_id); - if (error == cudaSuccess) { - device.set_vendor("NVidia"); - device.set_model(properties.name); - device.set_frequency(properties.clockRate * 1e-3); - device.set_num_cores(properties.multiProcessorCount); - device.set_num_registers(properties.regsPerMultiprocessor); - // For compute capability less than 5, l1 cache size is configurable to - // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For - // compute capability larger or equal to 5, l1 cache (unified with texture - // cache) size is 24 KB. This number may need to be updated for future - // compute capabilities. - device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); - device.set_l2_cache_size(properties.l2CacheSize); - device.set_l3_cache_size(0); - device.set_shared_memory_size_per_multiprocessor( - properties.sharedMemPerMultiprocessor); - device.set_memory_size(properties.totalGlobalMem); - // 8 is the number of bits per byte. 2 is accounted for - // double data rate (DDR). - device.set_bandwidth(properties.memoryBusWidth / 8 * - properties.memoryClockRate * 2); + cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value()); + if (error != cudaSuccess) { + device.set_type("UNKNOWN"); + LOG(ERROR) << "Failed to get device properties, error code: " << error; + return device; } + device.set_vendor("NVIDIA"); + device.set_model(properties.name); + device.set_frequency(properties.clockRate * 1e-3); + device.set_num_cores(properties.multiProcessorCount); + device.set_num_registers(properties.regsPerMultiprocessor); + // For compute capability less than 5, l1 cache size is configurable to + // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For + // compute capability larger or equal to 5, l1 cache (unified with texture + // cache) size is 24 KB. This number may need to be updated for future + // compute capabilities. + device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); + device.set_l2_cache_size(properties.l2CacheSize); + device.set_l3_cache_size(0); + device.set_shared_memory_size_per_multiprocessor( + properties.sharedMemPerMultiprocessor); + device.set_memory_size(properties.totalGlobalMem); + // 8 is the number of bits per byte. 2 is accounted for + // double data rate (DDR). + device.set_bandwidth(properties.memoryBusWidth / 8 * + properties.memoryClockRate * 2); + (*device.mutable_environment())["architecture"] = strings::StrCat(properties.major, ".", properties.minor); (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION); @@ -106,18 +113,26 @@ DeviceProperties GetLocalGPUInfo(int gpu_id) { } DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + if (device.type == "CPU") { return GetLocalCPUInfo(); } else if (device.type == "GPU") { if (device.has_id) { - return GetLocalGPUInfo(device.id); + TfGpuId tf_gpu_id(device.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else { - return GetLocalGPUInfo(0); + return GetLocalGPUInfo(CudaGpuId(0)); } } - DeviceProperties result; - result.set_type("UNKNOWN"); - return result; + return unknown; } } // end namespace grappler diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h index 191942040a..df8e7dca44 100644 --- a/tensorflow/core/grappler/clusters/utils.h +++ b/tensorflow/core/grappler/clusters/utils.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ #define TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" #include "tensorflow/core/protobuf/device_properties.pb.h" #include "tensorflow/core/util/device_name_utils.h" @@ -27,7 +28,7 @@ DeviceProperties GetLocalCPUInfo(); // Returns the DeviceProperties for the specified GPU attached to the server on // which grappler is running. -DeviceProperties GetLocalGPUInfo(int gpu_id); +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id); // Returns the DeviceProperties of the specified device DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device); diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc new file mode 100644 index 0000000000..74218adbac --- /dev/null +++ b/tensorflow/core/grappler/clusters/utils_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/clusters/utils.h" + +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/device_properties.pb.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(UtilsTest, GetLocalGPUInfo) { + GpuIdManager::TestOnlyReset(); +#if GOOGLE_CUDA + LOG(INFO) << "CUDA is enabled."; + DeviceProperties properties; + + // Invalid CUDA GPU ID. + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Succeed when a valid CUDA GPU id was inserted. + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#else + LOG(INFO) << "CUDA is not enabled."; + DeviceProperties properties; + + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("GPU", properties.type()); +#endif +} + +TEST(UtilsTest, GetDeviceInfo) { + GpuIdManager::TestOnlyReset(); + DeviceNameUtils::ParsedName device; + DeviceProperties properties; + + // Invalid type. + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Cpu info. + device.type = "CPU"; + properties = GetDeviceInfo(device); + EXPECT_EQ("CPU", properties.type()); + + // No TF GPU id provided. + device.type = "GPU"; + device.has_id = false; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); +#if GOOGLE_CUDA + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif + + // TF to CUDA GPU id mapping entry doesn't exist. + device.has_id = true; + device.id = 0; + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + +#if GOOGLE_CUDA + // Invalid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100)); + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Valid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0)); + device.id = 1; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 0fe01e9c9e..5336df1f51 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -142,6 +142,7 @@ tf_cuda_library( "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:graph", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 602f69f12e..076945d5c6 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -26,6 +26,8 @@ limitations under the License. #include "cuda/include/cudnn.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" @@ -200,17 +202,25 @@ std::vector FindInputFeatures( } DeviceProperties GetDeviceInfo(const string& device_str) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + DeviceNameUtils::ParsedName parsed; if (DeviceNameUtils::ParseFullName(device_str, &parsed)) { if (parsed.type == "GPU") { - return GetLocalGPUInfo(parsed.id); + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else if (parsed.type == "CPU") { return GetLocalCPUInfo(); } } - DeviceProperties device; - device.set_type("UNKNOWN"); - return device; + return unknown; } DeviceProperties GetDeviceInfo(const CostGraphDef::Node& node) { -- GitLab From 49b666dbbd58958a7499fa3961c1c8c75757ad7c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:08:54 -0800 Subject: [PATCH 010/311] Bring in `isbuiltin`. PiperOrigin-RevId: 187049824 --- tensorflow/python/util/tf_inspect.py | 5 +++++ tensorflow/python/util/tf_inspect_test.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index c2fe6fc449..a7cead5555 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -149,6 +149,11 @@ def getsource(object): # pylint: disable=redefined-builtin return _inspect.getsource(tf_decorator.unwrap(object)[1]) +def isbuiltin(object): # pylint: disable=redefined-builtin + """TFDecorator-aware replacement for inspect.isbuiltin.""" + return _inspect.isbuiltin(tf_decorator.unwrap(object)[1]) + + def isclass(object): # pylint: disable=redefined-builtin """TFDecorator-aware replacement for inspect.isclass.""" return _inspect.isclass(tf_decorator.unwrap(object)[1]) diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py index 8903e1156b..129408449e 100644 --- a/tensorflow/python/util/tf_inspect_test.py +++ b/tensorflow/python/util/tf_inspect_test.py @@ -144,6 +144,19 @@ def test_decorated_function_with_defaults(a, b=2, c='Hello'): self.assertEqual( expected, tf_inspect.getsource(test_decorated_function_with_defaults)) + def testIsBuiltin(self): + self.assertEqual( + tf_inspect.isbuiltin(TestDecoratedClass), + inspect.isbuiltin(TestDecoratedClass)) + self.assertEqual( + tf_inspect.isbuiltin(test_decorated_function), + inspect.isbuiltin(test_decorated_function)) + self.assertEqual( + tf_inspect.isbuiltin(test_undecorated_function), + inspect.isbuiltin(test_undecorated_function)) + self.assertEqual(tf_inspect.isbuiltin(range), inspect.isbuiltin(range)) + self.assertEqual(tf_inspect.isbuiltin(max), inspect.isbuiltin(max)) + def testIsClass(self): self.assertTrue(tf_inspect.isclass(TestDecoratedClass)) self.assertFalse(tf_inspect.isclass(test_decorated_function)) -- GitLab From 59e59b7b1065715e0e59ee134e769f625ec28edd Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 26 Feb 2018 11:10:20 -0800 Subject: [PATCH 011/311] eager/examples/resnet50: Fix breakage. PiperOrigin-RevId: 187050075 --- .../contrib/eager/python/examples/resnet50/resnet50_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index c106ab0a06..65dcc53aab 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -194,11 +194,11 @@ class ResNet50Benchmarks(tf.test.Benchmark): with tf.device(device): images, _ = random_batch(batch_size) for _ in xrange(num_burn): - model(images).cpu() + model(images, training=False).cpu() gc.collect() start = time.time() for _ in xrange(num_iters): - model(images).cpu() + model(images, training=False).cpu() self._report(label, start, num_iters, device, batch_size, data_format) def benchmark_eager_apply(self): -- GitLab From 98f38b608073e761d75227373b2b2c7d26c483e5 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 11:12:04 -0800 Subject: [PATCH 012/311] Add support for parsing the "gather" HLO PiperOrigin-RevId: 187050345 --- .../compiler/xla/tools/parser/hlo_parser.cc | 37 +++++++++++++++++-- .../xla/tools/parser/hlo_parser_test.cc | 24 ++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index cd2b843ad3..e60a5a4919 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -1049,9 +1049,40 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateDot(shape, operands[0], operands[1], dnum)); break; } - case HloOpcode::kGather: - // TODO(b/72710576): HLO parsing is not implemented for Gather. - return TokenError("HLO parsing is not implemented for Gather"); + case HloOpcode::kGather: { + optional> output_window_dims; + attrs["output_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &output_window_dims}; + optional> elided_window_dims; + attrs["elided_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &elided_window_dims}; + optional> gather_dims_to_operand_dims; + attrs["gather_dims_to_operand_dims"] = {/*required=*/true, + AttrTy::kBracedInt64List, + &gather_dims_to_operand_dims}; + optional index_vector_dim; + attrs["index_vector_dim"] = {/*required=*/true, AttrTy::kInt64, + &index_vector_dim}; + optional> window_bounds; + attrs["window_bounds"] = {/*required=*/true, AttrTy::kBracedInt64List, + &window_bounds}; + + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + + GatherDimensionNumbers dim_numbers = HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/*output_window_dims, + /*elided_window_dims=*/*elided_window_dims, + /*gather_dims_to_operand_dims=*/*gather_dims_to_operand_dims, + /*index_vector_dim=*/*index_vector_dim); + + instruction = builder->AddInstruction(HloInstruction::CreateGather( + shape, /*operand=*/operands[0], /*gather_indices=*/operands[1], + dim_numbers, *window_bounds)); + break; + } case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index b8c6b59204..863081d654 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -716,6 +716,18 @@ ENTRY %sparse_f32_r1 () -> f32[9] { ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6}) } +)" +}, +{ +"gather", +R"(HloModule StringifyGather + +ENTRY %Gather (input_tensor: f32[50,49,48,47,46], gather_indices: s64[10,9,8,7,5]) -> f32[10,9,8,7,30,29,28,27,26] { + %input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + %gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT %gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); @@ -860,6 +872,18 @@ ENTRY dot { ROOT dot = f32[2,3]{1,0} dot(a, b), lhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={0} } +)" +}, +{ +"gather", +R"(HloModule gather + +ENTRY Gather { + input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(input_tensor, gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); -- GitLab From b7b4fe66ee8adf936b1c2508a298c1e26a858af1 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 26 Feb 2018 11:13:09 -0800 Subject: [PATCH 013/311] Added const to Node* in various parts of the code base. PiperOrigin-RevId: 187050526 --- tensorflow/compiler/tf2xla/const_analysis.cc | 4 ++-- tensorflow/compiler/tf2xla/graph_compiler.cc | 2 +- .../core/common_runtime/shape_refiner.cc | 4 ++-- .../core/distributed_runtime/scheduler.cc | 18 +++++++++--------- .../core/distributed_runtime/scheduler.h | 6 +++--- tensorflow/core/graph/costmodel.cc | 2 +- tensorflow/core/graph/graph.cc | 2 +- tensorflow/core/graph/graph.h | 2 +- tensorflow/core/graph/graph_constructor.cc | 2 +- tensorflow/core/graph/graph_partition.cc | 6 +++--- tensorflow/core/graph/node_builder.cc | 6 +++--- tensorflow/core/graph/node_builder.h | 6 +++--- tensorflow/core/graph/optimizer_cse.cc | 16 ++++++++-------- 13 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index 82923722c5..6f46532419 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -37,7 +37,7 @@ Status BackwardsConstAnalysis(const Graph& g, }; Status status; - std::unordered_set must_be_const; + std::unordered_set must_be_const; auto visit = [&status, &metadata_ops, &must_be_const, compile_time_const_args](Node* node) { if (!status.ok()) return; @@ -55,7 +55,7 @@ Status BackwardsConstAnalysis(const Graph& g, compile_time_const_args->at(index) = true; return; } - for (Node* pred : node->in_nodes()) { + for (const Node* pred : node->in_nodes()) { must_be_const.insert(pred); } return; diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index 058a1f2621..b20c1ffc7d 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -130,7 +130,7 @@ Status GraphCompiler::Compile() { // Set up inputs from outputs of previous nodes. for (auto* e : n->in_edges()) { if (e->IsControlEdge()) continue; - Node* src = e->src(); + const Node* src = e->src(); TF_RET_CHECK(src->id() < output_registry.size()); const NodeOutputs& src_outputs = output_registry[src->id()]; diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 45cdab98e0..2acaa31d32 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -211,14 +211,14 @@ Status ShapeRefiner::AddNode(const Node* node) { // For each 'input' of this node, fetch the corresponding shape // from 'input's InferenceContext, and store into a vector // indexed by 'node's input. - std::vector input_nodes(node->num_inputs()); + std::vector input_nodes(node->num_inputs()); std::vector input_shapes(node->num_inputs()); std::vector>> input_handle_shapes_and_types(node->num_inputs()); for (const Edge* e : node->in_edges()) { if (e->IsControlEdge()) continue; - Node* input = e->src(); + const Node* input = e->src(); auto it = node_to_context_.find(input); if (it == node_to_context_.end()) { return errors::FailedPrecondition( diff --git a/tensorflow/core/distributed_runtime/scheduler.cc b/tensorflow/core/distributed_runtime/scheduler.cc index 9dae5b3b92..8403636197 100644 --- a/tensorflow/core/distributed_runtime/scheduler.cc +++ b/tensorflow/core/distributed_runtime/scheduler.cc @@ -80,7 +80,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { std::vector pending_count(graph_->num_node_ids()); InitializePending(graph_, &pending_count); - std::deque queue; + std::deque queue; Node* srcNode = graph_->source_node(); queue.push_back(srcNode); (*asap_times)[srcNode->id()] = 0; @@ -92,7 +92,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { for (const Edge* out_edge : curr->out_edges()) { // The time needed for 'out' to get its input from 'curr'. Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && curr->assigned_device_name() != out->assigned_device_name()) { // Add an arbitrary 10microsecs for each copy. @@ -137,7 +137,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { } } - std::deque queue; + std::deque queue; Node* sinkNode = graph_->sink_node(); queue.push_back(sinkNode); (*alap_times)[sinkNode->id()] = 0; @@ -148,7 +148,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { for (const Edge* in_edge : curr->in_edges()) { // The time needed for 'curr' to get its input from 'src'. Microseconds copy_time(0); - Node* src = in_edge->src(); + const Node* src = in_edge->src(); if (!in_edge->IsControlEdge() && src->assigned_device_name() != curr->assigned_device_name()) { // TODO(yuanbyu): Use the real cost model @@ -236,7 +236,7 @@ Microseconds GreedyScheduler::ComputeSchedule( for (const Edge* out_edge : event.node->out_edges()) { Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && event.node->assigned_device_name() != out->assigned_device_name()) { // TODO(yuanbyu): Use below with the real cost model. @@ -277,11 +277,11 @@ Microseconds GreedyScheduler::ComputeSchedule( return max_completion; } -Node* GreedyScheduler::GetNodeWithHighestPriority( - const std::vector& nodes) { - Node* curr_node = nullptr; +const Node* GreedyScheduler::GetNodeWithHighestPriority( + const std::vector& nodes) { + const Node* curr_node = nullptr; int64 curr_priority = kint64max; - for (Node* n : nodes) { + for (const Node* n : nodes) { if ((*priority_)[n->id()] < curr_priority) { curr_node = n; curr_priority = (*priority_)[n->id()]; diff --git a/tensorflow/core/distributed_runtime/scheduler.h b/tensorflow/core/distributed_runtime/scheduler.h index ef87b9834d..bf9d0d1bec 100644 --- a/tensorflow/core/distributed_runtime/scheduler.h +++ b/tensorflow/core/distributed_runtime/scheduler.h @@ -57,11 +57,11 @@ class GreedyScheduler { struct Sim { int degree_parallelism; int num_running; - std::vector ready_nodes; + std::vector ready_nodes; }; struct Event { - Node* node; + const Node* node; Microseconds time; bool is_completion; @@ -79,7 +79,7 @@ class GreedyScheduler { private: // Returns the ready node with the highest priority for a sim. - Node* GetNodeWithHighestPriority(const std::vector& nodes); + const Node* GetNodeWithHighestPriority(const std::vector& nodes); const DeviceSet* devices_; const CostModel* cost_model_; diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc index 4f3a6ec38c..1df45d9b89 100644 --- a/tensorflow/core/graph/costmodel.cc +++ b/tensorflow/core/graph/costmodel.cc @@ -427,7 +427,7 @@ static void AssignSizes(const Graph& g, CostModel* cost_model) { if (e->IsControlEdge()) { continue; } - Node* src = e->src(); + const Node* src = e->src(); // TODO(josh11b): Get an estimate from the Op Bytes size(1); diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 9b56216f1f..a7af5e2312 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -339,7 +339,7 @@ Node* Graph::AddNode(const NodeDef& node_def, Status* status) { return node; } -Node* Graph::CopyNode(Node* node) { +Node* Graph::CopyNode(const Node* node) { DCHECK(!node->IsSource()); DCHECK(!node->IsSink()); Node* copy = AllocateNode(node->props_, node); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 9d96cd4654..cbd58b051a 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -422,7 +422,7 @@ class Graph { // Copies *node, which may belong to another graph, to a new node, // which is returned. Does not copy any edges. *this owns the // returned instance. - Node* CopyNode(Node* node); + Node* CopyNode(const Node* node); // Removes a node from this graph, including all edges from or to it. // *node should not be accessed after calling this function. diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 0629ff32d0..627309078a 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -1271,7 +1271,7 @@ void CopyGraph(const Graph& src, Graph* dest) { dest->set_versions(src.versions()); // Copy the nodes - std::unordered_map + std::unordered_map node_map; // "Node in src" -> "Node in *dest" node_map[src.source_node()] = dest->source_node(); node_map[src.sink_node()] = dest->sink_node(); diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index add80eda23..17a174101b 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -123,8 +123,8 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { return false; } - Node* src = edge->src(); - Node* dst = edge->dst(); + const Node* src = edge->src(); + const Node* dst = edge->dst(); if (src->assigned_device_name() == dst->assigned_device_name()) { int src_port = edge->src_output(); int dst_port = edge->dst_input(); @@ -141,7 +141,7 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { // Return true iff (dst, dst_input) is specified on host memory. bool IsDstInputOnHost(const Edge* edge, const GraphInfo& info) { - Node* dst = edge->dst(); + const Node* dst = edge->dst(); int dst_port = edge->dst_input(); if (info.device_types[dst->id()] != DEVICE_CPU) { if (edge->IsControlEdge()) return false; diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc index 138952dcb3..114962c0e4 100644 --- a/tensorflow/core/graph/node_builder.cc +++ b/tensorflow/core/graph/node_builder.cc @@ -88,7 +88,7 @@ NodeBuilder& NodeBuilder::ControlInput(Node* src_node) { NodeBuilder& NodeBuilder::ControlInputs(gtl::ArraySlice src_nodes) { control_inputs_.insert(control_inputs_.end(), src_nodes.begin(), src_nodes.end()); - for (Node* src_node : src_nodes) { + for (const Node* src_node : src_nodes) { def_builder_.ControlInput(src_node->name()); } return *this; @@ -127,7 +127,7 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const { return Status::OK(); } -void NodeBuilder::AddIndexError(Node* node, int i) { +void NodeBuilder::AddIndexError(const Node* node, int i) { if (node == nullptr) { errors_.emplace_back( strings::StrCat("Attempt to add nullptr Node to node with type ", @@ -140,7 +140,7 @@ void NodeBuilder::AddIndexError(Node* node, int i) { } } -bool NodeBuilder::GetOutputType(Node* node, int i, DataType* dt) { +bool NodeBuilder::GetOutputType(const Node* node, int i, DataType* dt) { bool error; *dt = SafeGetOutput(node, i, &error); if (error) AddIndexError(node, i); diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h index 86647a49c1..f6b7b5674b 100644 --- a/tensorflow/core/graph/node_builder.h +++ b/tensorflow/core/graph/node_builder.h @@ -120,7 +120,7 @@ class NodeBuilder { const OpDef& op_def() const { return def_builder_.op_def(); } private: - static DataType SafeGetOutput(Node* node, int i, bool* error) { + static DataType SafeGetOutput(const Node* node, int i, bool* error) { if (node != nullptr && i >= 0 && i < node->num_outputs()) { *error = false; return node->output_type(i); @@ -131,11 +131,11 @@ class NodeBuilder { } // If SafeGetOutput indicates a range error, add it to errors_. - void AddIndexError(Node* node, int i); + void AddIndexError(const Node* node, int i); // Set *dt and returns true if i is in range. Combines // SafeGetOutput() and AddIndexError(). - bool GetOutputType(Node* node, int i, DataType* dt); + bool GetOutputType(const Node* node, int i, DataType* dt); NodeDefBuilder def_builder_; std::vector inputs_; diff --git a/tensorflow/core/graph/optimizer_cse.cc b/tensorflow/core/graph/optimizer_cse.cc index 6b452a1d5d..4073255db3 100644 --- a/tensorflow/core/graph/optimizer_cse.cc +++ b/tensorflow/core/graph/optimizer_cse.cc @@ -65,8 +65,8 @@ class OptimizerCSE { }; static void FillInputs(const Node* n, - gtl::InlinedVector* control_edges, - gtl::InlinedVector, 4>* in) { + gtl::InlinedVector* control_edges, + gtl::InlinedVector, 4>* in) { DCHECK_EQ(in->size(), n->num_inputs()); control_edges->clear(); for (const Edge* e : n->in_edges()) { @@ -96,8 +96,8 @@ size_t OptimizerCSE::NodeHash(const Node* n) { const int N_in = n->num_inputs(); strings::StrAppend(&str_to_hash, N_in); - gtl::InlinedVector control_edges; - gtl::InlinedVector, 4> in(N_in); + gtl::InlinedVector control_edges; + gtl::InlinedVector, 4> in(N_in); FillInputs(n, &control_edges, &in); for (const auto& edge : in) { strings::StrAppend(&str_to_hash, edge.first->id(), edge.second); @@ -147,10 +147,10 @@ bool OptimizerCSE::Equivalent(const Node* a, const Node* b, // Compare input sources if (a->num_inputs() != b->num_inputs()) return false; const int N_in = a->num_inputs(); - gtl::InlinedVector a_control_edges; - gtl::InlinedVector b_control_edges; - gtl::InlinedVector, 4> a_in(N_in); - gtl::InlinedVector, 4> b_in(N_in); + gtl::InlinedVector a_control_edges; + gtl::InlinedVector b_control_edges; + gtl::InlinedVector, 4> a_in(N_in); + gtl::InlinedVector, 4> b_in(N_in); FillInputs(a, &a_control_edges, &a_in); FillInputs(b, &b_control_edges, &b_in); if (a_in != b_in) return false; -- GitLab From e5b73fc9a8df0d87cb964ed49e946d2477c73e19 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 26 Feb 2018 11:22:43 -0800 Subject: [PATCH 014/311] TFLite: Ensures pointers to tensors won't be invalidated unless 16+ tensors are added. PiperOrigin-RevId: 187052100 --- tensorflow/contrib/lite/interpreter.cc | 13 +++---- tensorflow/contrib/lite/interpreter.h | 20 +++++++++++ tensorflow/contrib/lite/interpreter_test.cc | 40 +++++++++++++++++++++ 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 370e495527..0f5e17f0de 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -27,13 +27,6 @@ limitations under the License. #include "tensorflow/contrib/lite/nnapi_delegate.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" -namespace { - -// std::vector preallocation tuning. -constexpr const int kSlotsToReserve = 128; - -} // namespace - namespace tflite { // A trivial implementation of GraphInfo around the Interpreter. @@ -85,8 +78,8 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.GetExecutionPlan = nullptr; // Reserve some space for the tensors to avoid excessive resizing. - tensors_.reserve(kSlotsToReserve); - nodes_and_registration_.reserve(kSlotsToReserve); + tensors_.reserve(kTensorsReservedCapacity); + nodes_and_registration_.reserve(kTensorsReservedCapacity); next_execution_plan_index_to_prepare_ = 0; UseNNAPI(false); } @@ -353,6 +346,7 @@ TfLiteStatus Interpreter::PrepareOpsStartingAt( TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpPrepare(registration, &node) == kTfLiteError) { return kTfLiteError; } @@ -430,6 +424,7 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpInvoke(registration, &node) == kTfLiteError) { status = kTfLiteError; } diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index a9df2627e0..04c19644a0 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -265,6 +265,14 @@ class Interpreter { void set_model(const Model* model) { model_ = const_cast(model); } Model* model() const { return model_; } + // The default capacity of `tensors_` vector. + static constexpr int kTensorsReservedCapacity = 128; + // The capacity headroom of `tensors_` vector before calling ops' + // `prepare` and `invoke` function. In these functions, it's guaranteed + // allocating up to `kTensorsCapacityHeadroom` more tensors won't invalidate + // pointers to existing tensors. + static constexpr int kTensorsCapacityHeadroom = 16; + private: // Give 'op_reg' a chance to initialize itself using the contents of // 'buffer'. @@ -377,6 +385,18 @@ class Interpreter { static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context, TfLiteIntArray** execution_plan); + // Ensures that `tensors_` has at least `kTensorsCapacityHeadroom` extra + // capacity. Calling this function may invalidate existing pointers to + // tensors. After calling this function, adding `kTensorsCapacityHeadroom` + // more tensors won't invalidate the pointer to existing tensors. + void EnsureTensorsVectorCapacity() { + const int required_capacity = tensors_size() + kTensorsCapacityHeadroom; + if (required_capacity > tensors_.capacity()) { + tensors_.reserve(required_capacity); + context_.tensors = tensors_.data(); + } + } + // A pure C data structure used to communicate with the pure C plugin // interface. To avoid copying tensor metadata, this is also the definitive // structure to store tensors. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 28c96e5dde..2e6727b323 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -561,6 +561,46 @@ TEST(BasicInterpreter, TestCustomErrorReporter) { ASSERT_EQ(reporter.calls, 1); } +TEST(InterpreterTensorsCapacityTest, TestWithinHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom, + &new_tensor_index); + EXPECT_EQ(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + +TEST(InterpreterTensorsCapacityTest, TestExceedHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom + 1, + &new_tensor_index); + EXPECT_NE(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + // Test fixture that allows playing with execution plans. It creates a two // node graph that can be executed in either [0,1] order or [1,0] order. // The CopyOp records when it is invoked in the class member run_order_ -- GitLab From 0f8ee19ef830fc7d28ae611194bcd66f4383b038 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 11:43:14 -0800 Subject: [PATCH 015/311] Actually expose smart_cond and smart_constant_value in tf.contrib.framework Also moves these methods into their own file in python/framework. This avoids further bloating control_flow_ops.py and makes the BUILD deps easier for a future change I'm working on. PiperOrigin-RevId: 187055501 --- tensorflow/contrib/framework/BUILD | 1 + tensorflow/contrib/framework/__init__.py | 7 +- tensorflow/python/BUILD | 26 ++++++ tensorflow/python/framework/smart_cond.py | 79 +++++++++++++++++++ .../python/framework/smart_cond_test.py | 66 ++++++++++++++++ tensorflow/python/layers/utils.py | 5 +- tensorflow/python/ops/control_flow_ops.py | 56 ------------- .../python/ops/control_flow_ops_test.py | 36 --------- 8 files changed, 180 insertions(+), 96 deletions(-) create mode 100644 tensorflow/python/framework/smart_cond.py create mode 100644 tensorflow/python/framework/smart_cond_test.py diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index 1accb319d2..50868c6d6c 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -63,6 +63,7 @@ tf_custom_op_py_library( "//tensorflow/python:platform", "//tensorflow/python:pywrap_tensorflow", "//tensorflow/python:script_ops", + "//tensorflow/python:smart_cond", "//tensorflow/python:sparse_tensor", "//tensorflow/python:state_ops", "//tensorflow/python:state_ops_gen", diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index deeb5bec79..8063250091 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -87,6 +87,9 @@ See the @{$python/contrib.framework} guide. @@get_placeholders +@@smart_cond +@@smart_constant_value + @@CriticalSection @@BoundedTensorSpec @@ -104,10 +107,10 @@ from tensorflow.contrib.framework.python.ops import * from tensorflow.python.framework.ops import prepend_name_scope from tensorflow.python.framework.ops import strip_name_scope +from tensorflow.python.framework.smart_cond import smart_cond +from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.control_flow_ops import smart_cond -from tensorflow.python.ops.control_flow_ops import smart_constant_value from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['nest'] diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4c8c73548c..b0cb48c80c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -765,6 +765,31 @@ py_library( ], ) +py_library( + name = "smart_cond", + srcs = ["framework/smart_cond.py"], + srcs_version = "PY2AND3", + deps = [ + ":control_flow_ops", + ":tensor_util", + ], +) + +py_test( + name = "smart_cond_test", + size = "small", + srcs = ["framework/smart_cond_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":client_testlib", + ":constant_op", + ":framework_ops", + ":math_ops", + ":session", + ":smart_cond", + ], +) + py_library( name = "sparse_tensor", srcs = ["framework/sparse_tensor.py"], @@ -4091,6 +4116,7 @@ py_library( ":control_flow_ops", ":framework_for_generated_wrappers", ":platform", + ":smart_cond", ":tensor_util", ":util", ":variable_scope", diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py new file mode 100644 index 0000000000..f97bb01f54 --- /dev/null +++ b/tensorflow/python/framework/smart_cond.py @@ -0,0 +1,79 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""smart_cond and related utilties.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import control_flow_ops + + +def smart_cond(pred, true_fn=None, false_fn=None, name=None): + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Arguments: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if not callable(true_fn): + raise TypeError("`true_fn` must be callable.") + if not callable(false_fn): + raise TypeError("`false_fn` must be callable.") + + pred_value = smart_constant_value(pred) + if pred_value is not None: + if pred_value: + return true_fn() + else: + return false_fn() + else: + return control_flow_ops.cond(pred, true_fn=true_fn, false_fn=false_fn, + name=name) + + +def smart_constant_value(pred): + """Return the bool value for `pred`, or None if `pred` had a dynamic value. + + Arguments: + pred: A scalar, either a Python bool or tensor. + + Returns: + True or False if `pred` has a constant boolean value, None otherwise. + + Raises: + TypeError: If `pred` is not a Tensor or bool. + """ + if isinstance(pred, bool): + pred_value = pred + elif isinstance(pred, ops.Tensor): + pred_value = tensor_util.constant_value(pred) + else: + raise TypeError("`pred` must be a Tensor or a Python bool.") + return pred_value diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py new file mode 100644 index 0000000000..b682506da0 --- /dev/null +++ b/tensorflow/python/framework/smart_cond_test.py @@ -0,0 +1,66 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond +from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import googletest + + +@test_util.with_c_api +class SmartCondTest(test_util.TensorFlowTestCase): + + def testSmartCondTrue(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(2) + y = constant_op.constant(5) + z = smart_cond.smart_cond(True, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 5)) + self.assertEqual(z.eval(), 32) + + def testSmartCondFalse(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(4) + y = constant_op.constant(3) + z = smart_cond.smart_cond(False, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 3)) + self.assertEqual(z.eval(), 9) + + def testSmartCondMissingArg1(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, false_fn=lambda: x) + + def testSmartCondMissingArg2(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, lambda: x) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py index 484c6fc466..3b156c36a2 100644 --- a/tensorflow/python/layers/utils.py +++ b/tensorflow/python/layers/utils.py @@ -24,6 +24,7 @@ from tensorflow.python.eager import context from tensorflow.python.ops import variables from tensorflow.python.ops import control_flow_ops from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond as smart_module from tensorflow.python.framework import tensor_util from tensorflow.python.util import nest @@ -201,7 +202,7 @@ def smart_cond(pred, true_fn=None, false_fn=None, name=None): if isinstance(pred, variables.Variable): return control_flow_ops.cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) - return control_flow_ops.smart_cond( + return smart_module.smart_cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) @@ -228,7 +229,7 @@ def constant_value(pred): if isinstance(pred, variables.Variable): return None - return control_flow_ops.smart_constant_value(pred) + return smart_module.smart_constant_value(pred) def object_list_uid(object_list): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index c78a5aa8c2..8d5ab72670 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -23,7 +23,6 @@ See the @{$python/control_flow_ops} guide. @@no_op @@count_up_to @@cond -@@smart_cond @@case @@while_loop @@logical_and @@ -2130,61 +2129,6 @@ def cond(pred, # pylint: enable=redefined-outer-name -def smart_cond(pred, true_fn=None, false_fn=None, name=None): - """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. - - If `pred` is a bool or has a constant value, we return either `true_fn()` - or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. - - Arguments: - pred: A scalar determining whether to return the result of `true_fn` or - `false_fn`. - true_fn: The callable to be performed if pred is true. - false_fn: The callable to be performed if pred is false. - name: Optional name prefix when using `tf.cond`. - - Returns: - Tensors returned by the call to either `true_fn` or `false_fn`. - - Raises: - TypeError: If `true_fn` or `false_fn` is not callable. - """ - if not callable(true_fn): - raise TypeError("`true_fn` must be callable.") - if not callable(false_fn): - raise TypeError("`false_fn` must be callable.") - - pred_value = smart_constant_value(pred) - if pred_value is not None: - if pred_value: - return true_fn() - else: - return false_fn() - else: - return cond(pred, true_fn=true_fn, false_fn=false_fn, name=name) - - -def smart_constant_value(pred): - """Return the bool value for `pred`, or None if `pred` had a dynamic value. - - Arguments: - pred: A scalar, either a Python bool or tensor. - - Returns: - True or False if `pred` has a constant boolean value, None otherwise. - - Raises: - TypeError: If `pred` is not a Tensor or bool. - """ - if isinstance(pred, bool): - pred_value = pred - elif isinstance(pred, ops.Tensor): - pred_value = tensor_util.constant_value(pred) - else: - raise TypeError("`pred` must be a Tensor or a Python bool.") - return pred_value - - def _resource_safe_shape(t): """Returns the shape of t or the variable it points to.""" if t.dtype == dtypes.resource: diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py index adc8c51e11..f22f3059d1 100644 --- a/tensorflow/python/ops/control_flow_ops_test.py +++ b/tensorflow/python/ops/control_flow_ops_test.py @@ -349,42 +349,6 @@ class SwitchTestCase(test_util.TensorFlowTestCase): self.assertEquals(grad_x_false.eval(), 0.) -@test_util.with_c_api -class SmartCondTest(test_util.TensorFlowTestCase): - - def testSmartCondTrue(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(2) - y = constant_op.constant(5) - z = control_flow_ops.smart_cond(True, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 5)) - self.assertEqual(z.eval(), 32) - - def testSmartCondFalse(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(4) - y = constant_op.constant(3) - z = control_flow_ops.smart_cond(False, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 3)) - self.assertEqual(z.eval(), 9) - - def testSmartCondMissingArg1(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, false_fn=lambda: x) - - def testSmartCondMissingArg2(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, lambda: x) - - @test_util.with_c_api class CondTest(test_util.TensorFlowTestCase): -- GitLab From 72eef4b7cf49956a3c675c6dc9d0488176a224cb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:50:49 -0800 Subject: [PATCH 016/311] Add the internal module name prefix to the white list. PiperOrigin-RevId: 187056701 --- tensorflow/contrib/py2tf/impl/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/impl/config.py b/tensorflow/contrib/py2tf/impl/config.py index c90e85c96b..bdbc6663dd 100644 --- a/tensorflow/contrib/py2tf/impl/config.py +++ b/tensorflow/contrib/py2tf/impl/config.py @@ -31,12 +31,16 @@ PYTHON_LITERALS = { DEFAULT_UNCOMPILED_MODULES = set(( ('tensorflow',), (utils.__name__,), + + # All of tensorflow's subpackages. Unlike the root tf module, they don't + # have well-known names. Not refering to the module directly to avoid + # circular imports. + (utils.__name__[:-len('.contrib.py2tf.utils')],), )) NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) # TODO(mdan): Also allow controlling the generated names (for testability). -# TODO(mdan): Make sure copybara renames the reference below. COMPILED_IMPORT_STATEMENTS = ( 'from __future__ import print_function', 'import tensorflow as tf', -- GitLab From fd1a54b00b265a09d7026c05c074af6b8839e593 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 26 Feb 2018 11:52:26 -0800 Subject: [PATCH 017/311] Internal change. PiperOrigin-RevId: 187056963 --- tensorflow/tools/api/tests/api_compatibility_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index c1e09cc531..2a784973e1 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -165,7 +165,7 @@ class ApiCompatibilityTest(test.TestCase): logging.error('%d differences found between API and golden.', diff_count) messages = verbose_diffs if verbose else diffs for i in range(diff_count): - logging.error('Issue %d\t: %s', i + 1, messages[i]) + print('Issue %d\t: %s' % (i + 1, messages[i]), file=sys.stderr) if update_goldens: # Write files if requested. -- GitLab From 16dbf4b8b08a587329900c71da5cb1bcab075b19 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 11:57:30 -0800 Subject: [PATCH 018/311] Use optimized ops to handle GPU memory swapping: this avoids the need for 2 pairs of extra _send/_recv nodes which speeds things up a bit. This also ensures that performance doesn't depend on the recv scheduling built in TF, which isn't always optimal. PiperOrigin-RevId: 187057831 --- tensorflow/core/grappler/optimizers/BUILD | 36 +++++++- .../optimizers/gpu_swapping_kernels.cc | 88 +++++++++++++++++++ .../grappler/optimizers/gpu_swapping_ops.cc | 58 ++++++++++++ .../grappler/optimizers/memory_optimizer.cc | 9 +- .../optimizers/memory_optimizer_test.cc | 65 +++++++++++--- tensorflow/core/grappler/utils/BUILD | 1 + .../core/grappler/utils/grappler_test.cc | 17 ++++ .../core/grappler/utils/grappler_test.h | 3 + 8 files changed, 258 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 50ba48ea7a..908e58bcc7 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -1,6 +1,8 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") filegroup( name = "all_files", @@ -282,18 +284,48 @@ tf_cc_test( ], ) +tf_kernel_library( + name = "gpu_swapping_kernels", + srcs = [ + "gpu_swapping_kernels.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "gpu_swapping_ops", + srcs = [ + "gpu_swapping_ops.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + cc_library( name = "memory_optimizer", - srcs = ["memory_optimizer.cc"], + srcs = [ + "memory_optimizer.cc", + ], hdrs = [ "memory_optimizer.h", ], visibility = ["//visibility:public"], deps = [ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -307,7 +339,7 @@ cc_library( ], ) -tf_cc_test( +tf_cc_test_gpu( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], deps = [ diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc new file mode 100644 index 0000000000..1820af6844 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc @@ -0,0 +1,88 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Op kernels used to swap data in and out of GPU memory. + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +class CopyFromGpuToHostKernel : public AsyncOpKernel { + public: + explicit CopyFromGpuToHostKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, !ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromGpuToHost kernel " + "must reside on the device."), + done); + + AllocatorAttributes alloc_attrs; + alloc_attrs.set_gpu_compatible(true); + alloc_attrs.set_on_host(true); + Tensor* output; + OP_REQUIRES_OK_ASYNC( + ctx, ctx->allocate_output(0, input.shape(), &output, alloc_attrs), + done); + + ctx->op_device_context()->CopyDeviceTensorToCPU( + &input, "CopyFromGpuToHost", static_cast(ctx->device()), + output, [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromGpuToHost").Device(DEVICE_GPU).HostMemory("output"), + CopyFromGpuToHostKernel); + +class CopyFromHostToGpuKernel : public AsyncOpKernel { + public: + explicit CopyFromHostToGpuKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromHostToGpu kernel " + "must reside on the host."), + done); + + Tensor* output; + OP_REQUIRES_OK_ASYNC(ctx, ctx->allocate_output(0, input.shape(), &output), + done); + + ctx->op_device_context()->CopyCPUTensorToDevice( + &input, static_cast(ctx->device()), output, + [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromHostToGpu").Device(DEVICE_GPU).HostMemory("input"), + CopyFromHostToGpuKernel); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc new file mode 100644 index 0000000000..46828346da --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Definition for the ops used to swap data in and out of GPU memory. + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +// The _CopyFromGpuToHost op copies its input tensor to the host. The input must +// reside on GPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromGpuToHost") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from gpu to the host."); + +// The _CopyFromHostToGpu op copies its input tensor from the host to the GPU. +// The input must reside on CPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromHostToGpu") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from the host to the GPU."); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index dec4f04a1c..694139fa50 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -720,18 +720,19 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap, // Force the tensor to be copied to cpu. NodeDef* swap_out_node = graph->add_node(); swap_out_node->set_name(swap_out_name); - swap_out_node->set_op("Identity"); - swap_out_node->set_device("/device:CPU:0"); + swap_out_node->set_op("_CopyFromGpuToHost"); // Force the tensor to be restored to the device. NodeDef* swap_in_node = graph->add_node(); swap_in_node->set_name(swap_in_name); - swap_in_node->set_op("Identity"); + swap_in_node->set_op("_CopyFromHostToGpu"); *swap_in_node->add_input() = swap_out_node->name(); - // Colocate the swap_in_ node with the node itself. + // Colocate the swap_out_ and swap_in_ nodes with the node itself. + swap_out_node->set_device(node->device()); swap_in_node->set_device(node->device()); string coloc_group = strings::StrCat("loc@", tensor_to_swap); + (*swap_out_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index 5d7913e0c0..9595936e9e 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -221,16 +221,20 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { // Build a simple graph with an op that's marked for swapping. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = ops::Variable(s.WithOpName("a"), {10, 10}, DT_FLOAT); - Output b = ops::AddN(s.WithOpName("b"), {a}); - Output c = ops::AddN(s.WithOpName("c"), {b}); - Output d = ops::AddN(s.WithOpName("d"), {c}); - Output e = ops::AddN(s.WithOpName("e"), {b, d}); + Output a = + ops::Variable(s.WithOpName("a").WithDevice("/gpu:0"), {10, 10}, DT_FLOAT); + Output b = ops::AddN(s.WithOpName("b").WithDevice("/gpu:0"), {a}); + Output c = ops::AddN(s.WithOpName("c").WithDevice("/gpu:0"), {b}); + Output d = ops::AddN(s.WithOpName("d").WithDevice("/gpu:0"), {c}); + Output e = ops::AddN(s.WithOpName("e").WithDevice("/gpu:0"), {b, d}); + + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {10, 10}); + Output init = ops::Assign(s.WithOpName("init"), a, constant); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - EXPECT_EQ(5, item.graph.node_size()); + EXPECT_EQ(7, item.graph.node_size()); EXPECT_EQ(NodeName(e.name()), item.graph.node(4).name()); AttrValue& val = (*item.graph.mutable_node(4)->mutable_attr())["_swap_to_host"]; @@ -243,32 +247,43 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { Status status = optimizer.Optimize(cluster.get(), item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(7, output.node_size()); - const NodeDef& new_e = output.node(4); + EXPECT_EQ(9, output.node_size()); + const NodeDef& new_e = output.node(6); EXPECT_EQ(NodeName(e.name()), new_e.name()); EXPECT_EQ(2, new_e.input_size()); EXPECT_EQ(NodeName(d.name()), new_e.input(1)); EXPECT_EQ("swap_in_e_0", new_e.input(0)); - const NodeDef& swap_out = output.node(5); + const NodeDef& swap_out = output.node(7); EXPECT_EQ("swap_out_e_0", swap_out.name()); + EXPECT_EQ("_CopyFromGpuToHost", swap_out.op()); - const NodeDef& swap_in = output.node(6); + const NodeDef& swap_in = output.node(8); EXPECT_EQ("swap_in_e_0", swap_in.name()); + EXPECT_EQ("_CopyFromHostToGpu", swap_in.op()); EXPECT_EQ(NodeName(b.name()), swap_out.input(0)); EXPECT_EQ(NodeName(swap_out.name()), swap_in.input(0)); EXPECT_EQ("^c", swap_in.input(1)); - const NodeDef& new_c = output.node(2); + const NodeDef& new_c = output.node(4); EXPECT_EQ(NodeName(c.name()), new_c.name()); EXPECT_EQ("^swap_out_e_0", new_c.input(1)); // Run the optimizer a second time to ensure it's idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(cluster.get(), item, &output); + GrapplerItem item_copy(item, std::move(output)); + status = optimizer.Optimize(cluster.get(), item_copy, &output); TF_EXPECT_OK(status); + +#if GOOGLE_CUDA + item.fetch = {"e"}; + item.init_ops = {init.name()}; + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, SwappingHeuristics) { @@ -287,9 +302,13 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { Output h = ops::Exp(s.WithOpName("h").WithDevice("/gpu:0"), c); Output i = ops::Log(s.WithOpName("i").WithDevice("/gpu:0"), d); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e", "f", "g", "h", "i"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -308,6 +327,15 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { EXPECT_EQ("axis", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + for (int i = 0; i < item.fetch.size(); ++i) { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } +#endif } TEST_F(MemoryOptimizerTest, UnswappableInputs) { @@ -325,9 +353,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { Output e = ops::Concat(s.WithOpName("e").WithDevice("/gpu:0"), {b, c, d}, axis); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -344,6 +376,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { EXPECT_EQ("^swap_out_d_2", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, AccumulationRewrites) { diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 0a9dbe22cf..5d32609434 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -142,6 +142,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", + "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", ], ) diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index fed46c05fb..fef8e97b6e 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -35,6 +35,23 @@ std::vector GrapplerTest::EvaluateNodes( return output_tensors; } +std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(item.graph)); + RunOptions run_options; + if (!item.init_ops.empty()) { + std::vector dummy; + TF_CHECK_OK( + session->Run(run_options, {}, {}, item.init_ops, &dummy, nullptr)); + } + std::vector output_tensors; + TF_CHECK_OK( + session->Run(run_options, {}, item.fetch, {}, &output_tensors, nullptr)); + TF_CHECK_OK(session->Close()); + return output_tensors; +} + void GrapplerTest::AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph) { auto* node = graph->add_node(); diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 042b616aa4..fd6809b6e2 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -30,6 +31,8 @@ class GrapplerTest : public ::testing::Test { std::vector EvaluateNodes(const GraphDef& graph, const std::vector& node_names); + std::vector EvaluateFetchNodes(const GrapplerItem& item); + void AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph); -- GitLab From 63d4c46a613c4d0e44d966c040bdfbbd0b16d13d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 12:10:01 -0800 Subject: [PATCH 019/311] Fix bug calling gradients_function inside custom_gradient PiperOrigin-RevId: 187059871 --- tensorflow/python/eager/backprop_test.py | 13 +++++++++++++ tensorflow/python/eager/custom_gradient.py | 9 ++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 734558dee2..48fd170764 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -115,6 +115,19 @@ class BackpropTest(test.TestCase): with self.assertRaises(RuntimeError): backprop.gradients_function(f)(constant_op.constant(1.0)) + def testGradientsFunctionInCustomGradient(self): + + @custom_gradient.custom_gradient + def f(x): + (y,) = backprop.gradients_function(lambda x: x * x)(x) + + def grad(dy): + return [2 * dy] + + return y, grad + + self.assertAllEqual(f(1.0), 2.0) + def testImplicitGradOverEmbeddingLookup(self): batch_size = 8 embedding_size = 512 diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py index 05460ff996..fb932a9372 100644 --- a/tensorflow/python/eager/custom_gradient.py +++ b/tensorflow/python/eager/custom_gradient.py @@ -71,11 +71,10 @@ def custom_gradient(f): input_tensors = [tf_ops.convert_to_tensor(x) for x in args] - with tape.stop_recording(): - result, grad_fn = f(*args, **kwargs) - flat_result = nest.flatten(result) - # TODO(apassos) consider removing the identity below. - flat_result = [gen_array_ops.identity(x) for x in flat_result] + result, grad_fn = f(*args, **kwargs) + flat_result = nest.flatten(result) + # TODO(apassos) consider removing the identity below. + flat_result = [gen_array_ops.identity(x) for x in flat_result] def actual_grad_fn(*outputs): return nest.flatten(grad_fn(*outputs)) -- GitLab From 1120deaf0bf5a51db5351c12b548994b35ba71c8 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Mon, 26 Feb 2018 12:23:36 -0800 Subject: [PATCH 020/311] Internal change. PiperOrigin-RevId: 187061863 --- tensorflow/contrib/bayesflow/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 08b29fb6bc..270c309ec3 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -210,7 +210,7 @@ cuda_py_test( cuda_py_test( name = "hmc_test", - size = "medium", + size = "large", srcs = ["python/kernel_tests/hmc_test.py"], additional_deps = [ ":bayesflow_py", -- GitLab From da492741630f62bfd4f8475fa532ef216f0d2bfd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 12:33:17 -0800 Subject: [PATCH 021/311] Maintain a cache of output dtypes of ops in TFE_Context. PiperOrigin-RevId: 187062992 --- tensorflow/c/eager/c_api.cc | 20 ++++++++++++++++++++ tensorflow/c/eager/runtime.cc | 15 ++++++++++++--- tensorflow/c/eager/runtime.h | 6 ++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index c27a7129fa..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/rendezvous.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" @@ -823,6 +824,25 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, delete kernel; return; } + // Update output_dtypes inside `kernel`. + const tensorflow::OpDef* op_def = nullptr; + const tensorflow::FunctionDef* function_def = + ctx->func_lib_def.Find(ndef.op()); + if (function_def != nullptr) { + op_def = &(function_def->signature()); + } + if (op_def == nullptr) { + status->status = OpDefForOp(ndef.op().c_str(), &op_def); + if (!status->status.ok()) { + return; + } + } + tensorflow::DataTypeVector input_dtypes; + status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes, + kernel->output_dtypes()); + if (!status->status.ok()) { + return; + } tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index f77a937f1f..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -41,17 +41,26 @@ const uint32 kIsList = 1U << 31; } // namespace +Status OpDefForOp(const char* op_name, const OpDef** op_def) { + const OpRegistrationData* op_reg_data = nullptr; + Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + if (s.ok()) { + *op_def = &op_reg_data->op_def; + } + return s; +} + Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) { mutex_lock l(g_op_name_to_attr_type_map_lock); *out = gtl::FindPtrOrNull(*OpNameToAttrTypeMap(), op_name); if (*out != nullptr) return Status::OK(); - const OpRegistrationData* op_reg_data = nullptr; - Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + const OpDef* op_def = nullptr; + Status s = OpDefForOp(op_name, &op_def); if (!s.ok()) return s; std::unique_ptr m(new AttrTypeMap); // TODO(agarwal): Avoid having to create this "registry" at runtime, // perhaps can be done at op registration time? - for (const auto& attr : op_reg_data->op_def.attr()) { + for (const auto& attr : op_def->attr()) { string type = attr.type(); const bool is_list = (type.length() > 6 && type.compare(0, 4, "list") == 0); if (is_list) { diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 4d20b5244a..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -39,6 +39,9 @@ namespace tensorflow { // represent the TF_AttrType type of the values in the list. typedef std::unordered_map AttrTypeMap; +// Look up OpDef for `op_name`. +Status OpDefForOp(const char* op_name, const OpDef** op_def); + // Returns the AttrTypeMap for the TensorFlow operation named op_name. Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out); @@ -180,12 +183,15 @@ class KernelAndDevice { const OpKernel* kernel() const { return kernel_.get(); } + DataTypeVector* output_dtypes() { return &output_dtypes_; } + private: std::unique_ptr kernel_; Device* device_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; + DataTypeVector output_dtypes_; }; } // namespace tensorflow -- GitLab From c7ea6ace71ed503a316cc5eb3dd087c5e7709725 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Mon, 26 Feb 2018 13:06:59 -0800 Subject: [PATCH 022/311] Include c_api_experimental in libtensorflow.so's dependencies. PiperOrigin-RevId: 187068103 --- tensorflow/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 2e71783b0d..a4e7602bea 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -779,6 +779,7 @@ tf_cc_shared_object( }), deps = [ "//tensorflow/c:c_api", + "//tensorflow/c:c_api_experimental", "//tensorflow/c:exported_symbols.lds", "//tensorflow/c:version_script.lds", "//tensorflow/c/eager:c_api", -- GitLab From ba2cc572f99b09ddd6a60e0557059cb1da51b356 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 26 Feb 2018 13:54:02 -0800 Subject: [PATCH 023/311] Update eager uniform replay buffer microbenchmarks to compare against graph functions when possible. PiperOrigin-RevId: 187075418 --- .../contrib/framework/python/ops/critical_section_ops.py | 6 ++++-- tensorflow/python/framework/ops.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index 3c5c55ed65..ab603cc18e 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -143,7 +143,7 @@ class CriticalSection(object): def _init_from_args(self, name, shared_name): # pylint: disable=invalid-name """Initialize the CriticalSection from constructor arguments.""" with ops.name_scope(name, "CriticalSection", []) as name: - with ops.control_dependencies(None): + with ops.init_scope(): # pylint: disable=protected-access container = ops.get_default_graph()._container # pylint: enable=protected-access @@ -226,7 +226,9 @@ class CriticalSection(object): # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): - if sg.handle.name == self._handle.name: + sg_handle_name = ops.convert_to_tensor(sg.handle).name + self_handle_name = ops.convert_to_tensor(self._handle).name + if sg_handle_name == self_handle_name: # Other executions in the same critical section are allowed. continue if not (exclusive_resource_access or sg.exclusive_resource_access): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 5a14ea4176..b0d2704c07 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4805,7 +4805,14 @@ def container(container_name): @tf_export("colocate_with") def colocate_with(op, ignore_existing=False): if context.in_graph_mode(): - return get_default_graph().colocate_with(op, ignore_existing) + default_graph = get_default_graph() + if isinstance(op, EagerTensor): + if default_graph.building_function: + op = internal_convert_to_tensor(op) + else: + raise ValueError("Encountered an Eager-defined Tensor during graph " + "construction, but a function was not being built.") + return default_graph.colocate_with(op, ignore_existing) else: if op is not None: return device(op.device) -- GitLab From 7765066e6a686c7d6b1bed44248fafaa859db4eb Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 26 Feb 2018 14:00:07 -0800 Subject: [PATCH 024/311] TFTS: Switch to using core feature columns This fixes some shape issues that came up when using the tf.contrib.layers parsing functions. Adds a string -> embedding column API example to the LSTM example. PiperOrigin-RevId: 187076400 --- .../examples/data/multivariate_periods.csv | 200 +++++++++--------- .../timeseries/examples/known_anomaly.py | 8 +- .../contrib/timeseries/examples/lstm.py | 26 ++- .../python/timeseries/estimators.py | 53 +++-- .../timeseries/python/timeseries/model.py | 38 ++-- .../state_space_models/state_space_model.py | 10 +- 6 files changed, 177 insertions(+), 158 deletions(-) diff --git a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv index b49a0662c2..9b15b4f0b2 100644 --- a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv +++ b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv @@ -1,100 +1,100 @@ -0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0. -1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0. -2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0. -3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0. -4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0. -5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0. -6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0. -7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0. -8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0. -9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0. -10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0. -11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0. -12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0. -13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0. -14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0. -15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0. -16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0. -17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0. -18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0. -19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0. -20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0. -21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0. -22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0. -23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0. -24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0. -25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0. -26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0. -27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0. -28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0. -29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0. -30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0. -31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0. -32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0. -33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0. -34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0. -35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0. -36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0. -37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0. -38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0. -39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0. -40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0. -41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0. -42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0. -43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0. -44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0. -45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0. -46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0. -47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0. -48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0. -49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0. -50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0. -51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0. -52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0. -53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0. -54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0. -55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0. -56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0. -57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0. -58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0. -59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0. -60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0. -61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0. -62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0. -63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0. -64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0. -65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0. -66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0. -67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0. -68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0. -69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0. -70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0. -71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0. -72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0. -73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0. -74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0. -75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0. -76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0. -77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0. -78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0. -79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0. -80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0. -81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0. -82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0. -83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0. -84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0. -85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0. -86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0. -87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0. -88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0. -89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0. -90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0. -91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0. -92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0. -93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0. -94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0. -95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0. -96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0. -97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0. -98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0. -99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0. +0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0.,strkeya +1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0.,strkeyb +2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0.,strkey +3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0.,strkey +4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0.,strkey +5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0.,strkey +6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0.,strkey +7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0.,strkey +8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0.,strkey +9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0.,strkey +10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0.,strkeyc +11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0.,strkey +12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0.,strkey +13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0.,strkey +14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0.,strkey +15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0.,strkey +16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0.,strkey +17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0.,strkey +18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0.,strkey +19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0.,strkey +20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0.,strkey +21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0.,strkey +22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0.,strkey +23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0.,strkey +24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0.,strkey +25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0.,strkey +26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0.,strkey +27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0.,strkey +28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0.,strkey +29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0.,strkey +30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0.,strkey +31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0.,strkey +32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0.,strkey +33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0.,strkey +34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0.,strkey +35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0.,strkey +36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0.,strkey +37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0.,strkeyd +38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0.,strkey +39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0.,strkey +40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0.,strkey +41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0.,strkey +42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0.,strkey +43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0.,strkey +44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0.,strkey +45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0.,strkey +46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0.,strkey +47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0.,strkey +48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0.,strkey +49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0.,strkey +50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0.,strkey +51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0.,strkey +52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0.,strkey +53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0.,strkey +54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0.,strkey +55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0.,strkey +56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0.,strkey +57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0.,strkey +58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0.,strkey +59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0.,strkey +60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0.,strkey +61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0.,strkey +62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0.,strkey +63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0.,strkey +64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0.,strkey +65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0.,strkey +66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0.,strkey +67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0.,strkey +68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0.,strkey +69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0.,strkey +70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0.,strkey +71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0.,strkey +72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0.,strkey +73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0.,strkey +74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0.,strkey +75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0.,strkey +76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0.,strkey +77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0.,strkey +78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0.,strkey +79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0.,strkey +80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0.,strkey +81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0.,strkey +82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0.,strkey +83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0.,strkey +84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0.,strkey +85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0.,strkey +86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0.,strkey +87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0.,strkey +88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0.,strkey +89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0.,strkey +90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0.,strkey +91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0.,strkey +92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0.,strkey +93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0.,strkey +94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0.,strkey +95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0.,strkey +96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0.,strkey +97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0.,strkey +98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0.,strkey +99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0.,strkey diff --git a/tensorflow/contrib/timeseries/examples/known_anomaly.py b/tensorflow/contrib/timeseries/examples/known_anomaly.py index 7659dd308a..c08c0b0acb 100644 --- a/tensorflow/contrib/timeseries/examples/known_anomaly.py +++ b/tensorflow/contrib/timeseries/examples/known_anomaly.py @@ -46,12 +46,12 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): # Indicate the format of our exogenous feature, in this case a string # representing a boolean value. - string_feature = tf.contrib.layers.sparse_column_with_keys( - column_name="is_changepoint", keys=["no", "yes"]) + string_feature = tf.feature_column.categorical_column_with_vocabulary_list( + key="is_changepoint", vocabulary_list=["no", "yes"]) # Specify the way this feature is presented to the model, here using a one-hot # encoding. - one_hot_feature = tf.contrib.layers.one_hot_column( - sparse_id_column=string_feature) + one_hot_feature = tf.feature_column.indicator_column( + categorical_column=string_feature) estimator = tf.contrib.timeseries.StructuralEnsembleRegressor( periodicities=12, diff --git a/tensorflow/contrib/timeseries/examples/lstm.py b/tensorflow/contrib/timeseries/examples/lstm.py index f37cafcc50..2eee878196 100644 --- a/tensorflow/contrib/timeseries/examples/lstm.py +++ b/tensorflow/contrib/timeseries/examples/lstm.py @@ -59,10 +59,10 @@ class _LSTMModel(ts_model.SequentialTimeSeriesModel): num_units: The number of units in the model's LSTMCell. num_features: The dimensionality of the time series (features per timestep). - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects representing features which are inputs to the model but are - not predicted by it. These must then be present for training, - evaluation, and prediction. + exogenous_feature_columns: A list of `tf.feature_column`s representing + features which are inputs to the model but are not predicted by + it. These must then be present for training, evaluation, and + prediction. dtype: The floating point data type to use. """ super(_LSTMModel, self).__init__( @@ -189,12 +189,16 @@ def train_and_predict( export_directory=None): """Train and predict using a custom time series model.""" # Construct an Estimator from our LSTM model. + categorical_column = tf.feature_column.categorical_column_with_hash_bucket( + key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ # Exogenous features are not part of the loss, but can inform # predictions. In this example the features have no extra information, but # are included as an API example. - tf.contrib.layers.real_valued_column( - "2d_exogenous_feature", dimension=2)] + tf.feature_column.numeric_column( + "2d_exogenous_feature", shape=(2,)), + tf.feature_column.embedding_column( + categorical_column=categorical_column, dimension=10)] estimator = ts_estimators.TimeSeriesRegressor( model=_LSTMModel(num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), @@ -205,7 +209,11 @@ def train_and_predict( csv_file_name, column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES,) + (tf.contrib.timeseries.TrainEvalFeatures.VALUES,) * 5 - + ("2d_exogenous_feature",) * 2)) + + ("2d_exogenous_feature",) * 2 + + ("categorical_exogenous_feature",)), + # Data types other than for `times` need to be specified if they aren't + # float32. In this case one of our exogenous features has string dtype. + column_dtypes=((tf.int64,) + (tf.float32,) * 7 + (tf.string,))) train_input_fn = tf.contrib.timeseries.RandomWindowInputFn( reader, batch_size=4, window_size=32) estimator.train(input_fn=train_input_fn, steps=training_steps) @@ -215,7 +223,9 @@ def train_and_predict( predict_exogenous_features = { "2d_exogenous_feature": numpy.concatenate( [numpy.ones([1, 100, 1]), numpy.zeros([1, 100, 1])], - axis=-1)} + axis=-1), + "categorical_exogenous_feature": numpy.array( + ["strkey"] * 100)[None, :, None]} (predictions,) = tuple(estimator.predict( input_fn=tf.contrib.timeseries.predict_continuation_input_fn( evaluation, steps=100, diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index f8355f366f..8d13343e82 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.layers.python.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import ar_model from tensorflow.contrib.timeseries.python.timeseries import feature_keys from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib @@ -31,10 +29,12 @@ from tensorflow.contrib.timeseries.python.timeseries.state_space_models.filterin from tensorflow.python.estimator import estimator_lib from tensorflow.python.estimator.export import export_lib +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.training import training as train @@ -117,22 +117,29 @@ class TimeSeriesRegressor(estimator_lib.Estimator): dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) - with ops.Graph().as_default(): - # Default placeholders have only an unknown batch dimension. Make them - # in a separate graph, then splice in the series length to the shapes - # and re-create them in the outer graph. - exogenous_feature_shapes = { - key: (value.get_shape(), value.dtype) for key, value - in feature_column.make_place_holder_tensors_for_base_features( - self._model.exogenous_feature_columns).items()} - for feature_key, (batch_only_feature_shape, value_dtype) in ( - exogenous_feature_shapes.items()): - batch_only_feature_shape = batch_only_feature_shape.with_rank_at_least( - 1).as_list() - feature_shape = ([default_batch_size, default_series_length] - + batch_only_feature_shape[1:]) - placeholders[feature_key] = array_ops.placeholder( - dtype=value_dtype, name=feature_key, shape=feature_shape) + if self._model.exogenous_feature_columns: + with ops.Graph().as_default(): + # Default placeholders have only an unknown batch dimension. Make them + # in a separate graph, then splice in the series length to the shapes + # and re-create them in the outer graph. + parsed_features = ( + feature_column.make_parse_example_spec( + self._model.exogenous_feature_columns)) + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder( + shape=[None], dtype=dtypes.string), + features=parsed_features) + exogenous_feature_shapes = { + key: (value.get_shape(), value.dtype) for key, value + in placeholder_features.items()} + for feature_key, (batch_only_feature_shape, value_dtype) in ( + exogenous_feature_shapes.items()): + batch_only_feature_shape = ( + batch_only_feature_shape.with_rank_at_least(1).as_list()) + feature_shape = ([default_batch_size, default_series_length] + + batch_only_feature_shape[1:]) + placeholders[feature_key] = array_ops.placeholder( + dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. @@ -333,11 +340,11 @@ class StructuralEnsembleRegressor(StateSpaceRegressor): determine the model size. Learning autoregressive coefficients typically requires more steps and a smaller step size than other components. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments, `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]), and diff --git a/tensorflow/contrib/timeseries/python/timeseries/model.py b/tensorflow/contrib/timeseries/python/timeseries/model.py index bac7d1ebf5..7644764a74 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/model.py @@ -21,18 +21,17 @@ from __future__ import print_function import abc import collections -from tensorflow.contrib import layers -from tensorflow.contrib.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import math_utils from tensorflow.contrib.timeseries.python.timeseries.feature_keys import PredictionFeatures from tensorflow.contrib.timeseries.python.timeseries.feature_keys import TrainEvalFeatures +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope @@ -66,11 +65,11 @@ class TimeSeriesModel(object): Args: num_features: Number of features for the time series - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not + part of the series to be predicted. Passed to + `tf.feature_column.input_layer`. dtype: The floating point datatype to use. """ if exogenous_feature_columns: @@ -86,7 +85,7 @@ class TimeSeriesModel(object): @property def exogenous_feature_columns(self): - """`FeatureColumn` objects for features which are not predicted.""" + """`tf.feature_colum`s for features which are not predicted.""" return self._exogenous_feature_columns # TODO(allenl): Move more of the generic machinery for generating and @@ -265,11 +264,14 @@ class TimeSeriesModel(object): if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): - placeholder_features = ( - feature_column.make_place_holder_tensors_for_base_features( + parsed_features = ( + feature_column.make_parse_example_spec( self._exogenous_feature_columns)) - embedded = layers.input_from_feature_columns( - columns_to_tensors=placeholder_features, + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), + features=parsed_features) + embedded = feature_column.input_layer( + features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:] @@ -308,13 +310,13 @@ class TimeSeriesModel(object): # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. - if tensor.get_shape().ndims == 1: + if tensor.get_shape().ndims == 1 and tensor.dtype != dtypes.string: exogenous_features_single_batch_dimension[name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( - layers.input_from_feature_columns( - columns_to_tensors=exogenous_features_single_batch_dimension, + feature_column.input_layer( + features=exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = array_ops.reshape( @@ -381,8 +383,8 @@ class SequentialTimeSeriesModel(TimeSeriesModel): may use _scale_back_data or _scale_back_variance to return predictions to the input scale. dtype: The floating point datatype to use. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects. See `TimeSeriesModel`. + exogenous_feature_columns: A list of `tf.feature_column`s objects. See + `TimeSeriesModel`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py index 6257002647..951c6546d5 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py @@ -112,11 +112,11 @@ class StateSpaceModelConfiguration( exogenous_noise_decreases: If True, exogenous regressors can "set" model state, decreasing uncertainty. If both this parameter and exogenous_noise_increases are False, exogenous regressors are ignored. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a -- GitLab From a05488be720fc803ac56738c8bc0222fb8a36d7f Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Mon, 26 Feb 2018 14:11:08 -0800 Subject: [PATCH 025/311] Adding documentation for dataset/iterator checkpointing. PiperOrigin-RevId: 187078347 --- .../docs_src/programmers_guide/datasets.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index d19200e80c..d38fbddfa1 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -327,6 +327,35 @@ same op/node (created by `Iterator.get_next()`). Therefore, evaluating *any* of these tensors will advance the iterator for all components. A typical consumer of an iterator will include all components in a single expression. +### Saving iterator state + +The @{tf.contrib.data.make_saveable_from_iterator} function creates a +`SaveableObject` from an iterator, which can be used to save and +restore the current state of the iterator (and, effectively, the whole input +pipeline). A saveable object thus created can be added to @{tf.train.Saver} +variables list or the `tf.GraphKeys.SAVEABLE_OBJECTS` collection for saving and +restoring in the same manner as a @{tf.Variable}. Refer to +@{$saved_model$Saving and Restoring} for details on how to save and restore +variables. + +```python +# Create saveable object from iterator. +saveable = tf.contrib.data.make_saveable_from_iterator(iterator) + +# Save the iterator state by adding it to the saveable objects collection. +tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable) +saver = tf.train.Saver() + +with tf.Session() as sess: + + if should_checkpoint: + saver.save(path_to_checkpoint) + +# Restore the iterator state. +with tf.Session() as sess: + saver.restore(sess, path_to_checkpoint) +``` + ## Reading input data ### Consuming NumPy arrays -- GitLab From d98e7fc5720c1597b6f2034ba2ad62438ac5ef39 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 14:19:56 -0800 Subject: [PATCH 026/311] [XLA] GTE of a certain element of the tuple does not need not keep other elements alive. This achieves two things: 1. Heap simulation runtime is no longer quadratic in the number of tuple elements (as we don't add each GetTupleElement to the liveset of each buffer defined by the tuple). 2. A reduction in the heap memory footprint. PiperOrigin-RevId: 187079787 --- .../compiler/xla/service/heap_simulator.cc | 135 ++++++++++-------- .../xla/service/heap_simulator_test.cc | 50 +++++++ 2 files changed, 127 insertions(+), 58 deletions(-) diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc index a2d13c013c..3dd4c4a079 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.cc +++ b/tensorflow/compiler/xla/service/heap_simulator.cc @@ -27,38 +27,6 @@ namespace xla { using tensorflow::gtl::FlatMap; using tensorflow::gtl::FlatSet; -namespace { - -// Returns the set of buffers that may be sources of all operands of the given -// instruction. The returned buffers are guaranteed to have no duplicates, and -// to be sorted in a deterministic order. -std::vector UniqueOperandSourceBuffers( - const HloInstruction* instruction, - const TuplePointsToAnalysis& points_to_analysis) { - std::vector buffers; - for (const HloInstruction* operand : instruction->operands()) { - points_to_analysis.GetPointsToSet(operand).ForEachElement( - [&](const ShapeIndex& /*index*/, - const PointsToSet::BufferList& points_to) { - buffers.insert(buffers.end(), points_to.begin(), points_to.end()); - }); - } - - // Sort and then remove duplicates from buffers. - std::sort(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() < b->id(); - }); - buffers.erase(std::unique(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() == b->id(); - }), - buffers.end()); - return buffers; -} - -} // namespace - /*static*/ StatusOr HeapSimulator::Run( std::unique_ptr algorithm, const HloModule& module, @@ -93,6 +61,7 @@ Status HeapSimulator::RunComputation( const HloComputation& computation, const std::vector& instruction_sequence, const TuplePointsToAnalysis& points_to_analysis) { + VLOG(3) << "Computation:\n" << computation.ToString(); // The goal here is to minimize memory usage, assuming the given sequential // ordering of instructions. The strategy is to walk through the instruction // sequence, calling Alloc and Free on the underlying heap algorithm. The @@ -101,7 +70,51 @@ Status HeapSimulator::RunComputation( // 'live_buffers' tracks the liveness of each buffer that we assign, by // associating it with a set of HloInstructions that need to be visited. When // the set becomes empty, the buffer is no longer used, and can be freed. + // 'used_buffers' is the reverse map - it tracks which buffers were used by an + // instruction, so that we can remove the instructions from a buffer's live + // set after they are visited. FlatMap> live_buffers; + FlatMap> used_buffers; + auto add_user_to_buffer = [this, &live_buffers, &used_buffers]( + const HloInstruction* user, + const LogicalBuffer* buffer) { + if (!IgnoreBuffer(buffer)) { + VLOG(4) << " Adding user " << user->name() << " to buffer " + << buffer->ToString(); + live_buffers[buffer].insert(user); + used_buffers[user].insert(buffer); + } + }; + + // Initialize live_buffers for each buffer that we're going to assign. The + // set of instructions that need to be visited contains all users of all + // aliases, that is, all users of all instructions that have the buffer + // contained in their points-to set. + for (const HloInstruction* instruction : instruction_sequence) { + const PointsToSet& points_to = + points_to_analysis.GetPointsToSet(instruction); + const PointsToSet::BufferSet& buffer_set = points_to.CreateFlattenedSet(); + for (const HloInstruction* user : instruction->users()) { + if (user->opcode() != HloOpcode::kGetTupleElement) { + for (const LogicalBuffer* buffer : buffer_set) { + add_user_to_buffer(user, buffer); + } + } else { + // A GetTupleElement doesn't need to keep all of its operand's buffers + // alive. It only needs the buffers that relate to the element its + // extracting, and the tuple it's extracting from, but not the buffers + // for the other elements. + for (const LogicalBuffer* buffer : points_to.element({})) { + add_user_to_buffer(user, buffer); + } + const PointsToSet& gte_points_to = + points_to_analysis.GetPointsToSet(user); + for (const LogicalBuffer* buffer : gte_points_to.CreateFlattenedSet()) { + add_user_to_buffer(user, buffer); + } + } + } + } const HloInstruction* root = computation.root_instruction(); auto output_source_buffers = @@ -114,34 +127,17 @@ Status HeapSimulator::RunComputation( buffers_defined_by_instruction = points_to_analysis.GetBuffersDefinedByInstruction(instruction); - // Initialize live_buffers for each buffer that we're going to assign. The - // set of instructions that need to be visited contains all users of all - // aliases. The alias itself is not necessary; if it has users, the users - // are necessarily scheduled after the alias. And if it has no users, it is - // either a dead value or an output, both of which are handled below. - // - // We ignore control dependencies here. The reasoning is that the control - // dependencies have already been accounted for in the ordering of the given - // 'instruction_sequence', and should not otherwise artificially extend the - // lifetime of buffers that aren't already connected by a data dependency. + VLOG(3) << "Instruction: " << instruction->ToString(); + for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { + VLOG(4) << " Defines: " << buffer->ToString() + << (IgnoreBuffer(buffer) ? " (Ignored)" : ""); + } + dead_buffers_to_free.clear(); for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { if (IgnoreBuffer(buffer)) { continue; } - FlatSet* live_set = nullptr; - for (const BufferAlias& alias : - points_to_analysis.GetBufferAliases(*buffer)) { - const std::vector& users = - alias.instruction()->users(); - if (!users.empty()) { - if (live_set == nullptr) { - live_set = &live_buffers[buffer]; - } - live_set->insert(users.begin(), users.end()); - } - } - // Add a nullptr sentry to ensure entry parameters and output source // buffers are not freed until the very end. const bool entry_parameter = @@ -165,11 +161,12 @@ Status HeapSimulator::RunComputation( // have no instructions left to visit are moved from live_buffers to // operand_buffers_to_free. operand_buffers_to_free.clear(); - for (const LogicalBuffer* operand_buffer : - UniqueOperandSourceBuffers(instruction, points_to_analysis)) { + for (const LogicalBuffer* operand_buffer : used_buffers[instruction]) { if (IgnoreBuffer(operand_buffer)) { continue; } + VLOG(4) << " Removing user " << instruction->name() << " from buffer " + << operand_buffer->ToString(); auto it = live_buffers.find(operand_buffer); FlatSet* live_set = &it->second; live_set->erase(instruction); @@ -178,6 +175,11 @@ Status HeapSimulator::RunComputation( operand_buffers_to_free.push_back(operand_buffer); } } + // Sort to get a deterministic iteration order. + std::sort(operand_buffers_to_free.begin(), operand_buffers_to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); // Allocate buffers defined by this instruction. This is the latest point // that we can allocate; right before the buffer is first used. This must @@ -203,6 +205,8 @@ Status HeapSimulator::RunComputation( CanShareOperandBufferWithUser( operand_buffer->instruction(), operand_buffer->index(), buffer->instruction(), buffer->index(), points_to_analysis)) { + VLOG(3) << " Sharing: " << buffer->ToString() << " with " + << operand_buffer->ToString(); ShareBuffer(buffer, operand_buffer, instruction); shared = true; break; @@ -211,6 +215,7 @@ Status HeapSimulator::RunComputation( } if (!shared) { + VLOG(3) << " Allocating: " << buffer->ToString(); Alloc(buffer, instruction); } } @@ -244,20 +249,34 @@ Status HeapSimulator::RunComputation( // Free buffers that are no longer live. This is the earliest point that we // can de-allocate; right after the last use of the buffer. for (const LogicalBuffer* buffer : dead_buffers_to_free) { + VLOG(3) << " Freeing dead: " << buffer->ToString(); Free(buffer, instruction); } for (const LogicalBuffer* buffer : operand_buffers_to_free) { + VLOG(3) << " Freeing operand: " << buffer->ToString(); Free(buffer, instruction); } } // Any remaining live buffers must be entry parameters or output source - // buffers, which had a nullptr sentry added. Free them now. + // buffers, which had a nullptr sentry added. Free them now, in a + // deterministic order. + std::vector to_free; + to_free.reserve(live_buffers.size()); for (const auto& buffer_pending : live_buffers) { const LogicalBuffer* buffer = buffer_pending.first; const FlatSet& pending = buffer_pending.second; CHECK_EQ(pending.size(), 1) << *buffer; CHECK(*pending.begin() == nullptr) << *buffer; + to_free.push_back(buffer); + } + + std::sort(to_free.begin(), to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); + for (const LogicalBuffer* buffer : to_free) { + VLOG(3) << "Freeing pending: " << buffer->ToString(); Free(buffer, root); } diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc index 387b649a73..688a271712 100644 --- a/tensorflow/compiler/xla/service/heap_simulator_test.cc +++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc @@ -410,6 +410,56 @@ TEST_F(HeapSimulatorTest, MultiplyDotDotTuple) { }); } +TEST_F(HeapSimulatorTest, IndependentTupleElements) { + auto builder = HloComputation::Builder(TestName()); + auto paramA = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32scalar_, "paramA")); + auto paramB = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32scalar_, "paramB")); + auto mul = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kMultiply, paramA, paramB)); + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kAdd, paramA, paramB)); + auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({mul, add})); + auto element0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 0)); + auto broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(f32vec4_, element0, {0})); + auto sub = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kSubtract, paramA, paramB)); + auto element1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 1)); + auto output = builder.AddInstruction( + HloInstruction::CreateTuple({broadcast, sub, element1})); + + HeapSimulatorTracker tracker(TestName(), builder.Build(), + {paramA, paramB, mul, add, tuple, element0, + broadcast, sub, element1, output}); + tracker.ExpectCallSequence({ + {kAlloc, tracker.BufferAt(paramA, {})}, + {kAlloc, tracker.BufferAt(paramB, {})}, + {kAlloc, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(add, {})}, + {kAlloc, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(broadcast, {})}, + // The mul can be freed right after the broadcast happens, even though + // The other GetTupleElement is still alive. + {kFree, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(sub, {})}, + // The temporary tuple is now dead. + {kFree, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(output, {})}, + // All params and outputs are freed at the end. + {kFree, tracker.BufferAt(paramA, {})}, + {kFree, tracker.BufferAt(paramB, {})}, + {kFree, tracker.BufferAt(add, {})}, + {kFree, tracker.BufferAt(broadcast, {})}, + {kFree, tracker.BufferAt(sub, {})}, + {kFree, tracker.BufferAt(output, {})}, + {kFinish, nullptr}, + }); +} + TEST_F(HeapSimulatorTest, WholeModule) { HeapSimulatorTracker tracker(TestName()); -- GitLab From 5b7f78c767b30076850f9b9f88b8730767a0437c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:30 -0800 Subject: [PATCH 027/311] 1st version of sequential feature columns. PiperOrigin-RevId: 187080635 --- tensorflow/contrib/feature_column/BUILD | 31 +- .../sequential_feature_column.py | 308 +++++++++++- .../sequential_feature_column_test.py | 471 ++++++++++++++++++ 3 files changed, 808 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index 6fc053759c..a53e36c2d5 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -33,5 +33,34 @@ py_library( name = "sequential_feature_column", srcs = ["python/feature_column/sequential_feature_column.py"], srcs_version = "PY2AND3", - deps = [], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:variable_scope", + "//tensorflow/python/feature_column", + ], +) + +py_test( + name = "sequential_feature_column_test", + srcs = ["python/feature_column/sequential_feature_column_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":sequential_feature_column", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:training", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + ], ) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py index 690a44ff43..4ed7268e7a 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py @@ -12,8 +12,314 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experimental methods for tf.feature_column sequential input.""" +"""Experimental methods for tf.feature_column sequence input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function + + +import abc +import collections + + +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import variable_scope + +# TODO(b/73160931): Fix pydoc. +# pylint: disable=g-doc-args,missing-docstring,protected-access +# TODO(b/73827486): Support SequenceExample. + + +def sequence_input_layer( + features, + feature_columns, + weight_collections=None, + trainable=True, + scope=None): + """"Builds input layer for sequence input. + + All `feature_columns` must be sequence dense columns with the same + `sequence_length`. The output of this method can be fed into sequence + networks, such as RNN. + + The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ from + batch to batch. + + If multiple `feature_columns` are given with `Di` `num_elements` each, their + outputs are concatenated. So, the final `Tensor` has shape + `[batch_size, T, D0 + D1 + ... + Dn]`. + + Example: + + ```python + rating = sequence_numeric_column('rating') + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [rating, watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Returns: + An `(input_layer, sequence_length)` tuple where: + - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ + from batch to batch. `D` is the sum of `num_elements` for all + `feature_columns`. + - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence + length for each example. + Raises: + ValueError: If any of the `feature_columns` is the wrong type. + """ + feature_columns = fc._clean_feature_columns(feature_columns) + for c in feature_columns: + if not isinstance(c, _SequenceDenseColumn): + raise ValueError( + 'All feature_columns must be of type _SequenceDenseColumn. ' + 'Given (type {}): {}'.format(type(c), c)) + + with variable_scope.variable_scope( + scope, default_name='sequence_input_layer', values=features.values()): + builder = fc._LazyBuilder(features) + output_tensors = [] + sequence_lengths = [] + ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): + ordered_columns.append(column) + with variable_scope.variable_scope( + None, default_name=column._var_scope_name): + dense_tensor, sequence_length = column._get_sequence_dense_tensor( + builder, + weight_collections=weight_collections, + trainable=trainable) + # Flattens the final dimension to produce a 3D Tensor. + num_elements = column._variable_shape.num_elements() + shape = array_ops.shape(dense_tensor) + output_tensors.append( + array_ops.reshape( + dense_tensor, + shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) + # TODO(b/73160931): Verify sequence_length equality. + return array_ops.concat(output_tensors, -1), sequence_lengths[0] + + +# TODO(b/73160931): Add remaining categorical columns. +def sequence_categorical_column_with_identity( + key, num_buckets, default_value=None): + return _SequenceCategoricalColumn( + fc.categorical_column_with_identity( + key=key, + num_buckets=num_buckets, + default_value=default_value)) + + +# TODO(b/73160931): Merge with embedding_column +def _sequence_embedding_column( + categorical_column, dimension, initializer=None, ckpt_to_load_from=None, + tensor_name_in_ckpt=None, max_norm=None, trainable=True): + if not isinstance(categorical_column, _SequenceCategoricalColumn): + raise ValueError( + 'categorical_column must be of type _SequenceCategoricalColumn. ' + 'Given (type {}): {}'.format( + type(categorical_column), categorical_column)) + return _SequenceEmbeddingColumn( + fc.embedding_column( + categorical_column, + dimension=dimension, + initializer=initializer, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable)) + + +def sequence_numeric_column( + key, + shape=(1,), + default_value=0., + dtype=dtypes.float32): + # TODO(b/73160931): Add validations. + return _SequenceNumericColumn( + key, + shape=shape, + default_value=default_value, + dtype=dtype) + + +class _SequenceDenseColumn(fc._FeatureColumn): + """Represents dense sequence data.""" + + __metaclass__ = abc.ABCMeta + + TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name + 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) + + @abc.abstractproperty + def _variable_shape(self): + """`TensorShape` without batch and sequence dimensions.""" + pass + + @abc.abstractmethod + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + """Returns a `TensorSequenceLengthPair`.""" + pass + + +def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): + with ops.name_scope(None, 'sequence_length') as name_scope: + row_ids = sp_tensor.indices[:, 0] + column_ids = sp_tensor.indices[:, 1] + column_ids += array_ops.ones_like(column_ids) + seq_length = ( + math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) + # If the last n rows do not have ids, seq_length will have shape + # [batch_size - n]. Pad the remaining values with zeros. + n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] + padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) + return array_ops.concat([seq_length, padding], axis=0, name=name_scope) + + +class _SequenceCategoricalColumn( + fc._CategoricalColumn, + collections.namedtuple( + '_SequenceCategoricalColumn', ['categorical_column'])): + + @property + def name(self): + return self.categorical_column.name + + @property + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.categorical_column._transform_feature(inputs) + + @property + def _num_buckets(self): + return self.categorical_column._num_buckets + + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + id_tensor = sparse_tensors.id_tensor + weight_tensor = sparse_tensors.weight_tensor + # Expands final dimension, so that embeddings are not combined during + # embedding lookup. + check_id_rank = check_ops.assert_equal( + array_ops.rank(id_tensor), 2, + data=[ + 'Column {} expected ID tensor of rank 2. '.format(self.name), + 'id_tensor shape: ', array_ops.shape(id_tensor)]) + with ops.control_dependencies([check_id_rank]): + id_tensor = sparse_ops.sparse_reshape( + id_tensor, + shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) + if weight_tensor is not None: + check_weight_rank = check_ops.assert_equal( + array_ops.rank(weight_tensor), 2, + data=[ + 'Column {} expected weight tensor of rank 2.'.format(self.name), + 'weight_tensor shape:', array_ops.shape(weight_tensor)]) + with ops.control_dependencies([check_weight_rank]): + weight_tensor = sparse_ops.sparse_reshape( + weight_tensor, + shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) + return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) + + def _sequence_length(self, inputs): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) + + +class _SequenceEmbeddingColumn( + _SequenceDenseColumn, + collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): + + @property + def name(self): + return self.embedding_column.name + + @property + def _parse_example_spec(self): + return self.embedding_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.embedding_column._transform_feature(inputs) + + @property + def _variable_shape(self): + return self.embedding_column._variable_shape + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + dense_tensor = self.embedding_column._get_dense_tensor( + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) + sequence_length = self.embedding_column.categorical_column._sequence_length( + inputs) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + + +class _SequenceNumericColumn( + _SequenceDenseColumn, + collections.namedtuple( + '_SequenceNumericColumn', + ['key', 'shape', 'default_value', 'dtype'])): + + @property + def name(self): + return self.key + + @property + def _parse_example_spec(self): + return {self.key: parsing_ops.VarLenFeature(self.dtype)} + + def _transform_feature(self, inputs): + return inputs.get(self.key) + + @property + def _variable_shape(self): + return tensor_shape.TensorShape(self.shape) + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + # Do nothing with weight_collections and trainable since no variables are + # created in this function. + del weight_collections + del trainable + sp_tensor = inputs.get(self) + dense_tensor = sparse_ops.sparse_tensor_to_dense( + sp_tensor, default_value=self.default_value) + # Reshape into [batch_size, T, variable_shape]. + dense_shape = array_ops.concat( + [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], + axis=0) + dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) + sequence_length = _sequence_length_from_sparse_tensor( + sp_tensor, num_elements=self._variable_shape.num_elements()) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + +# pylint: enable=g-doc-args,missing-docstring,protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py new file mode 100644 index 0000000000..59674869a2 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py @@ -0,0 +1,471 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sequential_feature_column.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc +from tensorflow.python.feature_column.feature_column import _LazyBuilder +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.platform import test +from tensorflow.python.training import monitored_session + + +class SequenceInputLayerTest(test.TestCase): + + def test_embedding_column(self): + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + + embedding_dimension_a = 2 + embedding_values_a = ( + (1., 2.), # id 0 + (3., 4.), # id 1 + (5., 6.) # id 2 + ) + embedding_dimension_b = 3 + embedding_values_b = ( + (11., 12., 13.), # id 0 + (14., 15., 16.), # id 1 + (17., 18., 19.) # id 2 + ) + def _get_initializer(embedding_dimension, embedding_values): + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + return _initializer + + expected_input_layer = [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], + ] + expected_sequence_length = [1, 2] + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = sfc._sequence_embedding_column( + categorical_column_a, dimension=embedding_dimension_a, + initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) + categorical_column_b = sfc.sequence_categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_b = sfc._sequence_embedding_column( + categorical_column_b, dimension=embedding_dimension_b, + initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + }, + # Test that columns are reordered alphabetically. + feature_columns=[embedding_column_b, embedding_column_a]) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('sequence_input_layer/aaa_embedding/embedding_weights:0', + 'sequence_input_layer/bbb_embedding/embedding_weights:0'), + tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) + self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_input_layer = [ + [[0.], [1.]], + [[10.], [0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column_multi_dim(self): + """Tests sequence_input_layer for multi-dimensional numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + # The output of numeric_column._get_dense_tensor should be flattened. + expected_input_layer = [ + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +def _assert_sparse_tensor_value(test_case, expected, actual): + test_case.assertEqual(np.int64, np.array(actual.indices).dtype) + test_case.assertAllEqual(expected.indices, actual.indices) + + test_case.assertEqual( + np.array(expected.values).dtype, np.array(actual.values).dtype) + test_case.assertAllEqual(expected.values, actual.values) + + test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) + test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) + + +class SequenceCategoricalColumnWithIdentityTest(test.TestCase): + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((1, 2, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_value( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_get_sparse_tensors_inputs3d(self): + """Tests _get_sparse_tensors when the input is already 3D Tensor.""" + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 2, 0), + dense_shape=(2, 2, 1)) + + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'Column aaa expected ID tensor of rank 2\.\s*' + r'id_tensor shape:\s*\[2 2 1\]'): + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({'aaa': inputs})) + with monitored_session.MonitoredSession() as sess: + id_weight_pair.id_tensor.eval(session=sess) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_zeros(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((1, 0), (3, 0), (3, 1)), + values=(1, 2, 0), + dense_shape=(5, 2)) + expected_sequence_length = [0, 1, 0, 2, 0] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceEmbeddingColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)) + + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + expected_lookups = [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]], + ] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=embedding_dimension, + initializer=_initializer) + + embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('embedding_weights:0',), tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) + self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) + + def test_sequence_length(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [] + # example 1, ids [2] + # example 2, ids [0, 1] + # example 3, ids [] + # example 4, ids [1] + # example 5, ids [] + indices=((1, 0), (2, 0), (2, 1), (4, 0)), + values=(2, 0, 1, 1), + dense_shape=(6, 2)) + expected_sequence_length = [0, 1, 2, 0, 1, 0] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceNumericColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_dense_tensor = [ + [[0.], [1.]], + [[10.], [0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa') + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_sequence_dense_tensor_with_shape(self): + """Tests get_sequence_dense_tensor with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_dense_tensor = [ + [[0., 1., 2.], [3., 4., 5.]], + [[10., 11., 12.], [0., 0., 0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_dense_tensor_multi_dim(self): + """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + expected_dense_tensor = [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_sequence_length(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_shape(self): + """Tests _sequence_length with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [] + # example 1, values [[0.], [1.]] + # example 2, [[2.]] + # example 3, values [] + # example 4, [[3.]] + # example 5, values [] + indices=((1, 0), (1, 1), (2, 0), (4, 0)), + values=(0., 1., 2., 3.), + dense_shape=(6, 2)) + expected_sequence_length = [0, 2, 1, 0, 1, 0] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +if __name__ == '__main__': + test.main() -- GitLab From ecace69b5e28f508f76264e66778935e84c37715 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:37 -0800 Subject: [PATCH 028/311] Add a function that allows to dynamically verify whether a function is white listed for graph mode. PiperOrigin-RevId: 187080654 --- tensorflow/contrib/py2tf/impl/conversion.py | 18 ++++++++++++++++++ .../contrib/py2tf/impl/conversion_test.py | 11 +++++++++++ 2 files changed, 29 insertions(+) diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 044de33568..d95469ea53 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -97,6 +97,24 @@ class ConversionMap(object): self.dependency_cache[original_entity] = converted_ast +def is_whitelisted_for_graph(o): + """Check whether an entity is whitelisted for use in graph mode. + + Examples of whitelisted entities include all members of the tensorflow + package. + + Args: + o: A Python entity. + Returns: + Boolean + """ + m = tf_inspect.getmodule(o) + for prefix, in config.DEFAULT_UNCOMPILED_MODULES: + if m.__name__.startswith(prefix): + return True + return False + + def entity_to_graph(o, conversion_map, arg_values, arg_types): """Compile a Python entity into equivalent TensorFlow. diff --git a/tensorflow/contrib/py2tf/impl/conversion_test.py b/tensorflow/contrib/py2tf/impl/conversion_test.py index 7816f95857..9ff256aace 100644 --- a/tensorflow/contrib/py2tf/impl/conversion_test.py +++ b/tensorflow/contrib/py2tf/impl/conversion_test.py @@ -20,12 +20,23 @@ from __future__ import print_function import gast +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl import conversion +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test class ConversionTest(test.TestCase): + def test_is_whitelisted_for_graph(self): + + def test_fn(): + return constant_op.constant(1) + + self.assertFalse(conversion.is_whitelisted_for_graph(test_fn)) + self.assertTrue(conversion.is_whitelisted_for_graph(utils)) + self.assertTrue(conversion.is_whitelisted_for_graph(constant_op.constant)) + def test_entity_to_graph_unsupported_types(self): with self.assertRaises(ValueError): conversion_map = conversion.ConversionMap(True, (), (), None) -- GitLab From 26a765f95acc7cbc762b8e1fef94921cab8f181d Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:31:29 -0800 Subject: [PATCH 029/311] [TF:XLA] Bump open source llvm revision to r326083 PiperOrigin-RevId: 187081592 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index d6ac7be8b5..5b09c5e67d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", ], - sha256 = "f5721d9cc18a9109c9e9f847f48e69b710b961cee83e6691227e310cb3b5da58", - strip_prefix = "llvm-fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14", + sha256 = "63d4da54dc7bc9a79e2ad266d230f4f759520cccb344a2dd49c2c6383ab75285", + strip_prefix = "llvm-8f7bcdf3c65b9a47e35653d525135beb18f3ac25", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From a80896d3b3a2358f324dc4cd429409ea9acc8a09 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:32:08 -0800 Subject: [PATCH 030/311] Track DebugOptions in AotCompilationOptions In particular, I need this for supporting HLO profiling in the AOT backend. PiperOrigin-RevId: 187081674 --- tensorflow/compiler/xla/service/compile_only_service.cc | 3 +-- tensorflow/compiler/xla/service/compiler.cc | 3 +++ tensorflow/compiler/xla/service/compiler.h | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index dab73596e1..6664496ab6 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -72,8 +72,7 @@ CompileOnlyService::CompileAheadOfTime( VersionedComputationHandle versioned_handle = user_computation->GetVersionedHandle(); - // TODO(b/63773457): Track DebugOptions in AotCompilationOptions. - DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags(); + const DebugOptions& debug_options = options.debug_options(); // Dump computation proto state if flag is set. const string& directory_path = debug_options.xla_dump_computations_to(); diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index e2e9d2a0c0..0392d4af48 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -86,4 +86,7 @@ Compiler::GetPlatformCompilers() { return compilers->at(platform->id()).get(); } +AotCompilationOptions::AotCompilationOptions() + : debug_options_(legacy_flags::GetDebugOptionsFromFlags()) {} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 74fd24edf8..33e19efc72 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -79,11 +79,15 @@ class AotCompilationOptions { device_allocator_ = device_allocator; } + const DebugOptions& debug_options() const { return debug_options_; } + DebugOptions* mutable_debug_options() { return &debug_options_; } + protected: - AotCompilationOptions() = default; + AotCompilationOptions(); private: DeviceMemoryAllocator* device_allocator_ = nullptr; + DebugOptions debug_options_; }; // Abstract compiler interface that is subclassed for compilation on a -- GitLab From 153e10a037c5e348834108ff46d9dccdf0cfb9a9 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 14:38:31 -0800 Subject: [PATCH 031/311] Enable de/serialization of nested control flow. This is a follow-up to the previous commit (https://github.com/tensorflow/tensorflow/commit/23851760b7b099214bdd4f1b88156d7ac2bdd2a2). It adds the new proto schemas, enables the behavior for reading and writing the new protos, and adds a test for de/serializing nested while loops. There's still a bug preventing deserializing conds, which will be addressed in another change. PiperOrigin-RevId: 187082713 --- tensorflow/core/protobuf/control_flow.proto | 17 ++++++- tensorflow/python/ops/control_flow_ops.py | 54 ++++++-------------- tensorflow/python/training/saver_test.py | 56 +++++++++++++++++++++ 3 files changed, 88 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto index 2c9476a08a..3c05b4f0e2 100644 --- a/tensorflow/core/protobuf/control_flow.proto +++ b/tensorflow/core/protobuf/control_flow.proto @@ -17,6 +17,15 @@ message ValuesDef { map external_values = 2; } +// Container for any kind of control flow context. Any other control flow +// contexts that are added below should also be added here. +message ControlFlowContextDef { + oneof ctxt { + CondContextDef cond_ctxt = 1; + WhileContextDef while_ctxt = 2; + } +} + // Protocol buffer representing a CondContext object. message CondContextDef { // Name of the context. @@ -33,6 +42,9 @@ message CondContextDef { // Values and external values in control flow context. ValuesDef values_def = 5; + + // Contexts contained inside this context (e.g. nested conds). + repeated ControlFlowContextDef nested_contexts = 6; } // Protocol buffer representing a WhileContext object. @@ -70,5 +82,8 @@ message WhileContextDef { // Optional name of the maximum_iterations tensor. string maximum_iterations_name = 11; - // Next available id: 12. + // Contexts contained inside this context (e.g. nested whiles). + repeated ControlFlowContextDef nested_contexts = 12; + + // Next available id: 13. } diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 8d5ab72670..85944efbe8 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1767,13 +1767,9 @@ class CondContext(ControlFlowContext): context_def.branch = self._branch context_def.values_def.MergeFrom(super(CondContext, self)._to_values_def( export_scope)) - # TODO(b/72868227): enable this once the corresponding control_flow.proto - # changes have been checked in (they aren't checked in and this is - # disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -1785,14 +1781,10 @@ class CondContext(ControlFlowContext): ret = CondContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is here for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def) + ret.Exit() return ret def to_control_flow_context_def(self, context_def, export_scope=None): @@ -2110,10 +2102,7 @@ def cond(pred, # Only add non-nested conds to the collection. Any nested control flow will # be encapsulated in the root context. assert context_t.outer_context == context_f.outer_context - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or context_t.outer_context is None: + if context_t.outer_context is None: ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_t) ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_f) @@ -2336,13 +2325,9 @@ class WhileContext(ControlFlowContext): context_def.values_def.MergeFrom( super(WhileContext, self)._to_values_def( export_scope=export_scope)) - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -2364,14 +2349,10 @@ class WhileContext(ControlFlowContext): """ ret = WhileContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def, import_scope=import_scope) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def, import_scope=import_scope) + ret.Exit() return ret def GetWhileContext(self): @@ -3216,10 +3197,7 @@ def while_loop(cond, swap_memory=swap_memory) # Only add non-nested loops to the collection. Any nested control flow will # be encapsulated in the root context. - # TODO(b/72868227): enable condition once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or loop_context.outer_context is None: + if loop_context.outer_context is None: ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context) result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants) if maximum_iterations is not None: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index f00f98db00..b366ed30f3 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -53,6 +53,7 @@ from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import partitioned_variables @@ -2040,6 +2041,61 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) + def testNestedWhileLoops(self): + test_dir = self._get_test_dir("nested_whiles") + filename = os.path.join(test_dir, "metafile") + saver_ckpt = os.path.join(test_dir, "saver.ckpt") + + # Create two simple nested while loops. + with ops_lib.Graph().as_default(): + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + + var = variables.Variable(0) + var_name = var.name + + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + [0, var]) + output_name = output.name + + init_op = variables.global_variables_initializer() + + # Generate a MetaGraphDef containing the nested loops. + with session.Session() as sess: + sess.run(init_op) + sess.run(output) + saver = saver_module.Saver() + saver.save(sess, saver_ckpt) + saver.export_meta_graph(filename) + + # Build and run the gradients of the nested while loop. We use this below + # to verify that the gradients are correct with an imported MetaGraphDef. + grad = gradients_impl.gradients([output], [var]) + with session.Session() as sess: + sess.run(init_op) + expected_grad_value = sess.run(grad) + + # Restore the MetaGraphDef into a new Graph. + with ops_lib.Graph().as_default(): + with session.Session() as sess: + saver = saver_module.import_meta_graph(filename) + saver.restore(sess, saver_ckpt) + + # Make sure we can still build gradients and get the same result. + var = ops_lib.get_default_graph().get_tensor_by_name(var_name) + output = ops_lib.get_default_graph().get_tensor_by_name(output_name) + grad = gradients_impl.gradients([output], [var]) + + init_op = variables.global_variables_initializer() + + with session.Session() as sess: + sess.run(init_op) + actual_grad_value = sess.run(grad) + self.assertEqual(expected_grad_value, actual_grad_value) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From 95d36c770b24a343008d32eda85e8f91278f6df0 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 15:37:27 -0800 Subject: [PATCH 032/311] [XLA::Interpreter] Add support for kCall to HloEvaluator. Also enable xla/tests/call_test to run on interpreter. PiperOrigin-RevId: 187092587 --- .../compiler/xla/service/hlo_evaluator.cc | 20 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 3 +++ 3 files changed, 25 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 15ae53128a..fd06b19144 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2445,6 +2445,26 @@ Status HloEvaluator::HandleCopy(HloInstruction* copy) { return Status::OK(); } +Status HloEvaluator::HandleCall(HloInstruction* call) { + auto* computation = call->to_apply(); + auto operands = call->operands(); + + std::vector arg_literals; + arg_literals.reserve(operands.size()); + for (auto operand : operands) { + const Literal& arg_literal = GetEvaluatedLiteralFor(operand); + arg_literals.push_back(&arg_literal); + } + + HloEvaluator embedded_evaluator; + std::unique_ptr result = + embedded_evaluator.Evaluate(*computation, arg_literals) + .ConsumeValueOrDie(); + + evaluated_[call] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 3b2b697e49..c65d9915e3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleCall(HloInstruction* call) override; + private: // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 97abf217d7..33fde9737d 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1143,6 +1143,9 @@ xla_test( xla_test( name = "call_test", srcs = ["call_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", -- GitLab From aa2f0b68fb7052ea46547bf15fb8a46f6447f182 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 15:37:40 -0800 Subject: [PATCH 033/311] Uses a thread pool for graph functions in eager mode with inter_op_parallelism_threads. PiperOrigin-RevId: 187092622 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +++++++++++++- tensorflow/c/eager/runtime.cc | 14 ++++++++++---- tensorflow/c/eager/runtime.h | 3 +++ tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index e55cb672e9..16a2a15072 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,6 +21,7 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ + "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index bebb63c746..b233dd5b93 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = - tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); + status->status = tensorflow::KernelAndDevice::Init( + ndef, ctx->func_lib(device), &ctx->runner, kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 3356054cd0..29944df4c2 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -45,7 +46,15 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : policy(opts.policy), + : thread_pool(new tensorflow::thread::ThreadPool( + opts.session_options.options.env, "EagerCompute", + opts.session_options.options.config + .inter_op_parallelism_threads() != 0 + ? opts.session_options.options.config + .inter_op_parallelism_threads() + : tensorflow::port::NumSchedulableCPUs())), + runner([this](std::function f) { thread_pool->Schedule(f); }), + policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -54,6 +63,9 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} + const std::unique_ptr thread_pool; + std::function)> runner; + const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index 4bf24fec2c..b9618420f0 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,17 +255,22 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; + out->runner_ = nullptr; + out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; + out->runner_ = runner; + out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -296,10 +301,11 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - // TODO(apassos): use a thread pool. - std::function)> runner = - [](std::function f) { f(); }; - params.runner = &runner; + if (runner_ == nullptr) { + params.runner = &default_runner_; + } else { + params.runner = runner_; + } OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 7fede4dae9..fa5f839977 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,6 +169,7 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -188,6 +189,8 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; + std::function)>* runner_; + std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index 643153058c..ab0b535e1a 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); + Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From 175730d3791618a496a5c66d7d6fef9c7768cf34 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 26 Feb 2018 15:42:52 -0800 Subject: [PATCH 034/311] [XLA] Fix #17090 a problem in IrArray::Index::SourceIndexOfTranspose. Agebraic simplification transforms bitcast-equivalent transpose/reshape instructions to bitcast instructions before IR emission. As such, we should skip the checking on whether a transpose/reshape instruction is bitcast-equivalent or not during IR emission. Remove the call from IrArray::Index::SourceIndexOfTranspose to ShapeUtil::TransposeIsBitcast. Also remove the call from IrArray::Index::SourceIndexOfReshape to ShapeUtil::ReshapeIsBitcast. Remove the calls to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast from NotWorthHoistingIndividually because layout assignment hasn't been done there yet. Instead, returns true when the input is a transpose or reshape instruction, to prevent it from being hoisted out of loops. Add a check to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast to make sure that both input shape and output shape have layouts. Add two test cases. PiperOrigin-RevId: 187093399 --- .../xla/service/layout_assignment_test.cc | 79 +++++++++++++++++++ .../compiler/xla/service/llvm_ir/ir_array.cc | 8 +- .../while_loop_invariant_code_motion.cc | 12 +-- tensorflow/compiler/xla/shape_util.cc | 14 +--- tensorflow/compiler/xla/shape_util.h | 4 + 5 files changed, 95 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 88e5caaf47..62feb7c1e9 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -590,6 +590,85 @@ TEST_F(LayoutAssignmentTest, TransposeToBitcastToUser) { transpose->shape(), {2, 3, 0, 1})); } +// TransposeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, TransposeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = builder.AddInstruction( + HloInstruction::CreateTranspose(input_shape, param, {0, 2, 1})); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH(ShapeUtil::TransposeIsBitcast(hlo->operand(0)->shape(), + hlo->shape(), hlo->dimensions()), + "LayoutUtil::HasLayout"); +} + +// ReshapeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, ReshapeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = + builder.AddInstruction(HloInstruction::CreateReshape(input_shape, param)); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH( + ShapeUtil::ReshapeIsBitcast(hlo->operand(0)->shape(), hlo->shape()), + "LayoutUtil::HasLayout"); +} + +// Check that the computation below doesn't crash the compiler. +// +// Within a fusion computation, only the parameters and result get assigned a +// layout. When we run the algebraic simplifier on this computation post layout +// assignment, it should not call TransposeIsBitcast on the `transpose` node +// inside the fusion computation as TransposeIsBitcast checks both input_shape +// and output_shape have layouts. +TEST_F(LayoutAssignmentTest, TransposeWithinFusionDoesNotCrash) { + const char* module_str = R"( + HloModule test_module + + fused_computation { + param_1 = f32[2,2,2]{2,1,0} parameter(1) + transpose = f32[2,2,2]{2,1,0} transpose(param_1), dimensions={0,2,1} + reduce_1 = f32[] parameter(0) + broadcast_1 = f32[2,2,2]{2,1,0} broadcast(reduce_1), dimensions={} + ROOT divide_1 = f32[2,2,2]{2,1,0} divide(transpose, broadcast_1) + } + + ENTRY entry_computation { + fusion.1 = f32[2,2,2]{2,1,0} parameter(1) + reduce.1 = f32[] parameter(0) + fusion.2 = f32[2,2,2]{2,1,0} fusion(reduce.1, fusion.1), kind=kLoop, calls=fused_computation + ROOT tuple.1 = (f32[2,2,2]{2,1,0}) tuple(fusion.2) + } + )"; + + auto module = tools::Parse(module_str).ValueOrDie(); + + module = + backend() + .compiler() + ->RunHloPasses(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .ConsumeValueOrDie(); + + EXPECT_EQ( + ::tensorflow::Status::OK(), + backend() + .compiler() + ->RunBackend(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .status()); +} + // A GTE inside of a fusion node inherits the layout of its operand (which // should, if we keep following operands, eventually be a parameter). TEST_F(LayoutAssignmentTest, GTEInheritsLayoutFromOperand) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index 6384c7f46f..f3642cf0a1 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -160,7 +160,8 @@ IrArray::Index IrArray::Index::SourceIndexOfReshape( } } - if (linear() != nullptr && + if (linear() != nullptr && LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape) && ShapeUtil::ReshapeIsBitcast(input_shape, output_shape)) { return Index(source_multidim_index, linear(), input_shape); } @@ -195,10 +196,13 @@ IrArray::Index IrArray::Index::SourceIndexOfTranspose( llvm::IRBuilder<>* builder) const { std::vector operand_multidim_index = Permute(dimension_mapping, multidim()); - if (linear() != nullptr && + + if (linear() != nullptr && LayoutUtil::HasLayout(operand_shape) && + LayoutUtil::HasLayout(shape) && ShapeUtil::TransposeIsBitcast(operand_shape, shape, dimension_mapping)) { return Index(operand_multidim_index, linear(), operand_shape); } + return Index(operand_multidim_index); } diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index a5f9b01f01..3ef0cdff67 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -106,20 +106,12 @@ static bool NotWorthHoistingIndividually(const HloInstruction& instruction) { case HloOpcode::kBitcast: case HloOpcode::kBroadcast: case HloOpcode::kConstant: + case HloOpcode::kReshape: case HloOpcode::kReverse: case HloOpcode::kSlice: + case HloOpcode::kTranspose: case HloOpcode::kTuple: return true; - - case HloOpcode::kTranspose: - return ShapeUtil::TransposeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape(), instruction.dimensions()); - - case HloOpcode::kReshape: - return ShapeUtil::ReshapeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape()); } } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 604e0173e7..3152789016 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1073,11 +1073,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping) { - // Can't insert bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) && - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) && LayoutUtil::IsPadded(output_shape)) { @@ -1106,11 +1103,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape) { - // Can't convert reshapes into bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) || - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) || LayoutUtil::IsPadded(output_shape)) { diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 19b1aa93bd..8ee263fe5e 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -522,12 +522,16 @@ class ShapeUtil { // Returns whether a transpose from input_shape to output_shape with dimension // mapping "dimension_mapping" produces a result which is bit-wise identical // to its input and thus may be replaced with a bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping); // Returns whether a reshape from "input_shape" to "output_shape" is a // bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape); -- GitLab From 7c512d5461eeff635acf1c7d0f301f5bb880b6b3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 16:01:04 -0800 Subject: [PATCH 035/311] [XLA] Add more supported dtypes to the local Python client. PiperOrigin-RevId: 187096144 --- tensorflow/compiler/xla/python/xla_client.py | 38 ++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 3b8ec851d5..90cda42f32 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -30,9 +30,9 @@ from tensorflow.compiler.xla import xla_data_pb2 from tensorflow.compiler.xla.python import pywrap_xla as c_api -# Most functions are snake_case for consistency with other modules, -# whereas method names of ComputationBuilder and LocalComputation are -# CamelCase for consistency with XLA. +# Most functions are snake_case for consistency with other modules, whereas +# method names of ComputationBuilder and LocalComputation are CamelCase for +# consistency with XLA. # pylint: disable=invalid-name @@ -123,24 +123,34 @@ _BINARY_OPS = [ 'Pow', ] + XLA_ELEMENT_TYPE_TO_DTYPE = { - xla_data_pb2.F32: np.dtype(np.float32), - xla_data_pb2.F64: np.dtype(np.float64), - xla_data_pb2.S32: np.dtype(np.int32), - xla_data_pb2.S64: np.dtype(np.int64), - xla_data_pb2.U32: np.dtype(np.uint32), - xla_data_pb2.U64: np.dtype(np.uint64), - xla_data_pb2.PRED: np.dtype(np.bool), + xla_data_pb2.PRED: np.dtype('bool'), + xla_data_pb2.S8: np.dtype('int8'), + xla_data_pb2.S16: np.dtype('int16'), + xla_data_pb2.S32: np.dtype('int32'), + xla_data_pb2.S64: np.dtype('int64'), + xla_data_pb2.U8: np.dtype('uint8'), + xla_data_pb2.U16: np.dtype('uint16'), + xla_data_pb2.U32: np.dtype('uint32'), + xla_data_pb2.U64: np.dtype('uint64'), + xla_data_pb2.F16: np.dtype('float16'), + xla_data_pb2.F32: np.dtype('float32'), + xla_data_pb2.F64: np.dtype('float64'), + xla_data_pb2.C64: np.dtype('complex64'), xla_data_pb2.TUPLE: np.dtype(np.object), } # Note the conversion on the key. Numpy has a known issue wherein dtype hashing # doesn't work as expected (https://github.com/numpy/numpy/issues/7242). Thus, # when keying by dtype in this dict, we use the string form of dtypes. -DTYPE_TO_XLA_ELEMENT_TYPE = { - str(v): k - for k, v in XLA_ELEMENT_TYPE_TO_DTYPE.items() -} +DTYPE_TO_XLA_ELEMENT_TYPE = {str(dt): et + for et, dt in XLA_ELEMENT_TYPE_TO_DTYPE.items()} + + +def dtype_to_etype(dtype): + """Convenience function for reading DTYPE_TO_XLA_ELEMENT_TYPE.""" + return DTYPE_TO_XLA_ELEMENT_TYPE[str(np.dtype(dtype))] class LocalBuffer(object): -- GitLab From 511cf67f2327e9186124a92c9469dc60fd64a6a2 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Mon, 26 Feb 2018 16:23:46 -0800 Subject: [PATCH 036/311] Deprecate tf.contrib.learn. RELNOTES: Deprecated tf.contrib.learn. Please check contrib/learn/README.md for instructions on how to convert existing code. PiperOrigin-RevId: 187099439 --- .../python/framework/experimental_test.py | 1 - tensorflow/contrib/learn/README.md | 143 ++++++++++++++++++ tensorflow/contrib/learn/__init__.py | 7 +- tensorflow/contrib/learn/python/__init__.py | 7 +- .../contrib/learn/python/learn/__init__.py | 7 +- .../python/learn/basic_session_run_hooks.py | 43 +++++- .../learn/python/learn/datasets/__init__.py | 12 +- .../learn/python/learn/datasets/base.py | 26 +++- .../learn/python/learn/datasets/mnist.py | 23 ++- .../learn/datasets/produce_small_datasets.py | 7 +- .../learn/python/learn/datasets/synthetic.py | 10 +- .../python/learn/datasets/text_datasets.py | 10 +- .../learn/python/learn/estimators/__init__.py | 7 +- .../learn/python/learn/estimators/_sklearn.py | 4 +- .../learn/estimators/composable_model.py | 17 ++- .../python/learn/estimators/constants.py | 8 +- .../learn/python/learn/estimators/debug.py | 14 +- .../learn/python/learn/estimators/dnn.py | 19 ++- .../learn/estimators/dnn_linear_combined.py | 19 ++- .../learn/estimators/dynamic_rnn_estimator.py | 13 +- .../python/learn/estimators/estimator.py | 27 +++- .../learn/estimators/estimator_test_utils.py | 7 +- .../learn/python/learn/estimators/head.py | 20 ++- .../learn/python/learn/estimators/kmeans.py | 9 +- .../learn/python/learn/estimators/linear.py | 19 ++- .../learn/estimators/logistic_regressor.py | 10 +- .../python/learn/estimators/metric_key.py | 10 +- .../learn/python/learn/estimators/model_fn.py | 22 ++- .../python/learn/estimators/prediction_key.py | 8 +- .../python/learn/estimators/rnn_common.py | 7 +- .../python/learn/estimators/run_config.py | 19 ++- .../estimators/state_saving_rnn_estimator.py | 13 +- .../learn/python/learn/estimators/svm.py | 11 +- .../learn/estimators/tensor_signature.py | 11 +- .../python/learn/estimators/test_data.py | 7 +- .../contrib/learn/python/learn/evaluable.py | 11 +- .../contrib/learn/python/learn/experiment.py | 24 +-- .../learn/python/learn/export_strategy.py | 14 +- .../learn/python/learn/graph_actions.py | 8 +- .../learn/python/learn/learn_io/__init__.py | 7 +- .../learn/python/learn/learn_io/dask_io.py | 11 +- .../python/learn/learn_io/data_feeder.py | 29 +++- .../python/learn/learn_io/generator_io.py | 9 +- .../learn/python/learn/learn_io/graph_io.py | 16 +- .../learn/python/learn/learn_io/numpy_io.py | 9 +- .../learn/python/learn/learn_io/pandas_io.py | 12 +- .../learn/python/learn/learn_runner.py | 10 +- .../learn/python/learn/learn_runner_lib.py | 6 +- .../contrib/learn/python/learn/metric_spec.py | 13 +- .../contrib/learn/python/learn/models.py | 14 +- .../learn/python/learn/monitored_session.py | 6 +- .../contrib/learn/python/learn/monitors.py | 68 ++++++++- .../learn/python/learn/ops/__init__.py | 7 +- .../learn/python/learn/ops/embeddings_ops.py | 6 +- .../learn/python/learn/ops/losses_ops.py | 7 +- .../learn/python/learn/ops/seq2seq_ops.py | 12 +- .../python/learn/preprocessing/__init__.py | 7 +- .../python/learn/preprocessing/categorical.py | 15 +- .../preprocessing/categorical_vocabulary.py | 13 +- .../learn/python/learn/preprocessing/text.py | 26 +++- .../learn/python/learn/session_run_hook.py | 6 +- .../python/learn/summary_writer_cache.py | 5 +- .../contrib/learn/python/learn/trainable.py | 9 +- .../learn/python/learn/utils/__init__.py | 7 +- .../learn/python/learn/utils/export.py | 9 +- .../contrib/learn/python/learn/utils/gc.py | 13 +- .../python/learn/utils/input_fn_utils.py | 16 +- .../python/learn/utils/inspect_checkpoint.py | 2 +- .../learn/utils/saved_model_export_utils.py | 30 +++- tensorflow/python/util/decorator_utils.py | 2 +- 70 files changed, 945 insertions(+), 111 deletions(-) create mode 100644 tensorflow/contrib/learn/README.md diff --git a/tensorflow/contrib/framework/python/framework/experimental_test.py b/tensorflow/contrib/framework/python/framework/experimental_test.py index 8e54e09e04..cfdc7df7d8 100644 --- a/tensorflow/contrib/framework/python/framework/experimental_test.py +++ b/tensorflow/contrib/framework/python/framework/experimental_test.py @@ -49,7 +49,6 @@ class ExperimentalTest(test.TestCase): "\nTHIS FUNCTION IS EXPERIMENTAL. It may change or " "be removed at any time, and without warning." "\n" - "\n" "\nArgs:" "\n arg0: Arg 0." "\n arg1: Arg 1." diff --git a/tensorflow/contrib/learn/README.md b/tensorflow/contrib/learn/README.md new file mode 100644 index 0000000000..d516bffc5e --- /dev/null +++ b/tensorflow/contrib/learn/README.md @@ -0,0 +1,143 @@ +EVERYTHING IN THIS DIRECTORY IS DEPRECATED. + +Using functions or classes will result in warnings. + +Instructions for converting to current alternatives are included in the +warnings. A high-level overview is below. + +## Canned Estimators + +Many canned estimators (subclasses of `Estimator`) have equivalents in core: +`DNNClassifier`, `DNNRegressor`, `DNNEstimator`, `LinearClassifier`, +`LinearRegressor`, `DNNLinearCombinedClassifier` and +`DNNLinearCombinedRegressor`. They are exposed under `tf.estimator`. +`DNNEstimator`, `LinearEstimator` and `DNNLinearCombinedEstimator` +are exposed under `tf.contrib.estimator`. + +To migrate to the new api, users need to take the following steps: + +* Replace `tf.contrib.learn` with `tf.estimator`. +* If you subclass any of the estimators, stop doing that. You should be able to + write a factory method that returns a canned estimator instead. If this is not + possible (if you override methods from the canned estimator), consider writing + a custom estimator instead. See `tf.estimator.Estimator`. +* Set `loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE` to preserve loss + reduction as the average over batch. +* Some optimizer-related arguments are no longer passed in the estimator + constructor. Instead, we provide methods that perform the same job by wrapping + an optimizer. Specifically: + * `gradient_clip_norm`: Use `tf.contrib.estimator.clip_gradients_by_norm` + * `embedding_lr_multipliers`: Not supported. + Other arguments: + * `input_layer_min_slice_size`: Replaced by `input_layer_partitioner` + * `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. + * `feature_engineering_fn`: Not supported. You can call your + `feature_engineering_fn` inside your input_fn: + ```python + def new_input_fn(): + features, labels = old_input_fn() + return feature_engineering_fn(features, labels) + ``` +* Use `tf.reshape` to reshape labels in your `input_fn`. `tf.estimator` + classifiers and regressors expect labels as a 2D Tensor of shape + `[batch_size, 1]`, or `[batch_size, n_labels]`. In contrast, + `tf.contrib.learn` classifiers and regressors supported labels with shape + `[batch_size]`. +* If you pass custom metrics from the `evaluate()` method call, use + `tf.contrib.estimator.add_metrics`. +* Replace your `serving_input_fn` with a `serving_input_receiver_fn`. + Note this should be entirely distinct from your training `input_fn`, so if you + previously had one `input_fn` with different "modes", you should now factor + that apart. Where the former returned either a simple `(features, labels)` + tuple or `InputFnOps`, you should now return a `ServingInputReceiver`. + If you were generating your `serving_input_fn` using the + `build_parsing_serving_input_fn` helper, you can simply drop in the + replacement `build_parsing_serving_input_receiver_fn`. + +Some remaining estimators/classes: + +* `DynamicRnnEstimator`: Consider a custom `model_fn`. +* `KMeansClustering`: Use `tf.contrib.factorization.KMeansClustering`. +* `LogisticRegressor`: Not supported. Instead, use `binary_classification_head` + with a custom `model_fn`, or with `DNNEstimator`. +* `StateSavingRnnEstimator`: Consider a custom `model_fn`. +* SVM: Consider a custom `model_fn`. +* `LinearComposableModel` and `DNNComposableModel`: Not supported. + Consider `tf.contrib.estimator.DNNEstimator`, or write a custom model_fn. +* `MetricSpec`: Deprecated. For adding custom metrics to canned Estimators, use + `tf.contrib.estimator.add_metrics`. + +## Estimator +`tf.contrib.learn.Estimator` is migrated to `tf.estimator.Estimator`. + +To migrate, users need to take the following steps: + +* Replace `tf.contrib.learn.Estimator` with `tf.estimator.Estimator`. +* If you pass a `config` argument to `Estimator`, this must be + `tf.estimator.RunConfig`. You may need to edit your code accordingly. +* Edit your `model_fn` to return `tf.estimator.EstimatorSpec`. Refer to + `EstimatorSpec` for documentation of specific fields. +* If your `model_fn` uses the `mode` argument, use `tf.estimator.ModeKeys`. + +Some related classes: +* `Evaluable`, `Trainable`: Not supported, merged into `tf.estimator.Estimator`. +* ExportStrategy: Replaced by `tf.estimator.Exporter`. + +## Head/MultiHead +These classes are now supported under `tf.contrib.estimator`, e.g. +`tf.contrib.estimator.multi_class_head` and `tf.contrib.estimator.multi_head`. + +Some differences: + +* `multi_class_head`: If you use `tf.contrib.learn.multi_class_head` with + `n_classes=2`, switch to `tf.contrib.estimator.binary_classification_head`. +* `loss_only_head`: Not supported. +* `poisson_regression_head`: Not supported (yet). +* `binary_svm_head`: Not supported (yet). +* `no_op_train_fn`: Replace it with `tf.no_op`. + +Some arguments are renamed, please refer to documentation. In addition: + +* `loss_fn`: Supported for `multi_label_head`. If you need it for other heads, + please open an issue. +* `metric_class_ids`: Not supported (yet). +* `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. +* `label_name`: Not needed in `tf.estimator`. If you don’t use `multi_head`, + drop this argument. If you use `multi_head`, refer to + `tf.contrib.estimator.multi_head` documentation. + +## Experiment Class - Distributed Training Tooling + +Switch to `tf.estimator.train_and_evaluate`. Some differences: + +* Most of the constructor arguments, like `train_input_fn`, `eval_input_fn`, + should be wrapped into `tf.estimator.TrainSpec` and `tf.estimator.EvalSpec`. +* Remove the `experiment_fn`. Instead, create the `Estimator`, + `train_spec` and `eval_spec`, then call `tf.estimator.train_and_evaluate` + directly. +* Inside `tf.estimator.EvalSpec`, the `exporter` field is the replacement + for `export_strategy`. To be precise, `tf.estimator.LatestExporter` is the + replacement for `tf.contrib.learn.make_export_strategy`. If you want to export + only at the end of training use `tf.estimator.FinalExporter`. +* If the `TF_CONFIG` environment variable is constructed manually, please read + the `train_and_evaluate` documentation for the new requirementds (in + particular, the chief node and evaluator node). + +## Others Classes and Functions + +* `tf.contrib.learn.datasets` is deprecated. We are adding ready to use datasets + to tensorflow/models. Many smaller datasets are available from other sources, + such as scikits.learn. Some Python processing may have to be written, but this + is straightforward to implement using the standard modules. +* `tf.contrib.learn.preprocessing`: Deprecated. The python-only preprocessing + functions are not a good fit for TensorFlow. Please use `tf.data`, and + consider tensorflow/transform for more complex use cases. +* `tf.contrib.learn.models`: Not supported, use canned estimators instead. +* `tf.contrib.learn.monitors`: Implement `SessionRunHook` instead. Hook + implementations are in `tf.train`. +* `tf.contrib.learn.learn_io`: Use the methods in `tf.estimator.inputs`, such as + `tf.estimator.inputs.numpy_input_fn`. Some utility functions have no + equivalent, we encourage the use of `tf.data`. + diff --git a/tensorflow/contrib/learn/__init__.py b/tensorflow/contrib/learn/__init__.py index 3698af027e..79bd73faaf 100644 --- a/tensorflow/contrib/learn/__init__.py +++ b/tensorflow/contrib/learn/__init__.py @@ -13,8 +13,11 @@ # limitations under the License. # ============================================================================== -# TODO(ptucker,ipolosukhin): Improve descriptions. -"""High level API for learning. +"""High level API for learning (DEPRECATED). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. See the @{$python/contrib.learn} guide. diff --git a/tensorflow/contrib/learn/python/__init__.py b/tensorflow/contrib/learn/python/__init__.py index bbebd5ab97..df23aeb2c4 100644 --- a/tensorflow/contrib/learn/python/__init__.py +++ b/tensorflow/contrib/learn/python/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py index cdc67c77d5..76e0e8ac8f 100644 --- a/tensorflow/contrib/learn/python/learn/__init__.py +++ b/tensorflow/contrib/learn/python/learn/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py index 2284ec46e9..fed1c44d19 100644 --- a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py +++ b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py @@ -12,20 +12,47 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Some common SessionRunHook classes.""" +"""Some common SessionRunHook classes (deprected). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.training import basic_session_run_hooks +from tensorflow.python.util.deprecation import deprecated_alias # pylint: disable=invalid-name -LoggingTensorHook = basic_session_run_hooks.LoggingTensorHook -StopAtStepHook = basic_session_run_hooks.StopAtStepHook -CheckpointSaverHook = basic_session_run_hooks.CheckpointSaverHook -StepCounterHook = basic_session_run_hooks.StepCounterHook -NanLossDuringTrainingError = basic_session_run_hooks.NanLossDuringTrainingError -NanTensorHook = basic_session_run_hooks.NanTensorHook -SummarySaverHook = basic_session_run_hooks.SummarySaverHook +LoggingTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.LoggingTensorHook', + 'tf.train.LoggingTensorHook', + basic_session_run_hooks.LoggingTensorHook) +StopAtStepHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StopAtStepHook', + 'tf.train.StopAtStepHook', + basic_session_run_hooks.StopAtStepHook) +CheckpointSaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.CheckpointSaverHook', + 'tf.train.CheckpointSaverHook', + basic_session_run_hooks.CheckpointSaverHook) +StepCounterHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StepCounterHook', + 'tf.train.StepCounterHook', + basic_session_run_hooks.StepCounterHook) +NanLossDuringTrainingError = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanLossDuringTrainingError', + 'tf.train.NanLossDuringTrainingError', + basic_session_run_hooks.NanLossDuringTrainingError) +NanTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanTensorHook', + 'tf.train.NanTensorHook', + basic_session_run_hooks.NanTensorHook) +SummarySaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.SummarySaverHook', + 'tf.train.SummarySaverHook', + basic_session_run_hooks.SummarySaverHook) # pylint: enable=invalid-name diff --git a/tensorflow/contrib/learn/python/learn/datasets/__init__.py b/tensorflow/contrib/learn/python/learn/datasets/__init__.py index 7240b0de14..3c34712ac8 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/__init__.py +++ b/tensorflow/contrib/learn/python/learn/datasets/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Dataset utilities and synthetic/reference datasets.""" +"""Dataset utilities and synthetic/reference datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.contrib.learn.python.learn.datasets import mnist from tensorflow.contrib.learn.python.learn.datasets import synthetic from tensorflow.contrib.learn.python.learn.datasets import text_datasets +from tensorflow.python.util.deprecation import deprecated # Export load_iris and load_boston. load_iris = base.load_iris @@ -51,6 +57,7 @@ SYNTHETIC = { } +@deprecated(None, 'Please use tf.data.') def load_dataset(name, size='small', test_with_fake_data=False): """Loads dataset by name. @@ -73,8 +80,9 @@ def load_dataset(name, size='small', test_with_fake_data=False): return DATASETS[name]() +@deprecated(None, 'Please use tf.data.') def make_dataset(name, n_samples=100, noise=None, seed=42, *args, **kwargs): - """Creates binary synthetic datasets + """Creates binary synthetic datasets. Args: name: str, name of the dataset to generate diff --git a/tensorflow/contrib/learn/python/learn/datasets/base.py b/tensorflow/contrib/learn/python/learn/datasets/base.py index ca720ae5ed..3b5c9b97c0 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/base.py +++ b/tensorflow/contrib/learn/python/learn/datasets/base.py @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base utilities for loading datasets.""" + +"""Base utilities for loading datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +35,14 @@ import numpy as np from six.moves import urllib from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated + Dataset = collections.namedtuple('Dataset', ['data', 'target']) Datasets = collections.namedtuple('Datasets', ['train', 'validation', 'test']) +@deprecated(None, 'Use tf.data instead.') def load_csv_with_header(filename, target_dtype, features_dtype, @@ -53,6 +62,7 @@ def load_csv_with_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def load_csv_without_header(filename, target_dtype, features_dtype, @@ -70,6 +80,7 @@ def load_csv_without_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def shrink_csv(filename, ratio): """Create a smaller dataset of only 1/ratio of original data.""" filename_small = filename.replace('.', '_small.') @@ -84,6 +95,7 @@ def shrink_csv(filename, ratio): i += 1 +@deprecated(None, 'Use scikits.learn.datasets.') def load_iris(data_path=None): """Load Iris dataset. @@ -100,6 +112,7 @@ def load_iris(data_path=None): data_path, target_dtype=np.int, features_dtype=np.float) +@deprecated(None, 'Use scikits.learn.datasets.') def load_boston(data_path=None): """Load Boston housing dataset. @@ -116,7 +129,12 @@ def load_boston(data_path=None): data_path, target_dtype=np.float, features_dtype=np.float) -def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): +@deprecated(None, 'Use the retry module or similar alternatives.') +def retry(initial_delay, + max_delay, + factor=2.0, + jitter=0.25, + is_retriable=None): """Simple decorator for wrapping retriable functions. Args: @@ -152,7 +170,7 @@ def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): for delay in delays(): try: return fn(*args, **kwargs) - except Exception as e: # pylint: disable=broad-except) + except Exception as e: # pylint: disable=broad-except if is_retriable is None: continue @@ -176,11 +194,13 @@ def _is_retriable(e): return isinstance(e, IOError) and e.errno in _RETRIABLE_ERRNOS +@deprecated(None, 'Please use urllib or similar directly.') @retry(initial_delay=1.0, max_delay=16.0, is_retriable=_is_retriable) def urlretrieve_with_retry(url, filename=None): return urllib.request.urlretrieve(url, filename) +@deprecated(None, 'Please write your own downloading logic.') def maybe_download(filename, work_directory, source_url): """Download the data from source url, unless it's already here. diff --git a/tensorflow/contrib/learn/python/learn/datasets/mnist.py b/tensorflow/contrib/learn/python/learn/datasets/mnist.py index 37f9175015..abbb44c2f5 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/mnist.py +++ b/tensorflow/contrib/learn/python/learn/datasets/mnist.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functions for downloading and reading MNIST data.""" +"""Functions for downloading and reading MNIST data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated # CVDF mirror of http://yann.lecun.com/exdb/mnist/ DEFAULT_SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/' @@ -37,6 +43,7 @@ def _read32(bytestream): return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_images(f): """Extract the images into a 4D uint8 numpy array [index, y, x, depth]. @@ -65,6 +72,7 @@ def extract_images(f): return data +@deprecated(None, 'Please use tf.one_hot on tensors.') def dense_to_one_hot(labels_dense, num_classes): """Convert class labels from scalars to one-hot vectors.""" num_labels = labels_dense.shape[0] @@ -74,6 +82,7 @@ def dense_to_one_hot(labels_dense, num_classes): return labels_one_hot +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. @@ -103,7 +112,15 @@ def extract_labels(f, one_hot=False, num_classes=10): class DataSet(object): + """Container class for a dataset (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def __init__(self, images, labels, @@ -210,6 +227,8 @@ class DataSet(object): return self._images[start:end], self._labels[start:end] +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def read_data_sets(train_dir, fake_data=False, one_hot=False, @@ -275,5 +294,7 @@ def read_data_sets(train_dir, return base.Datasets(train=train, validation=validation, test=test) +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def load_mnist(train_dir='MNIST-data'): return read_data_sets(train_dir) diff --git a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py index 6e0ba38941..a4848fa64a 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Produce DBpedia datasets of a smaller size.""" +"""Produce DBpedia datasets of a smaller size (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py index 9a843168c2..6a0e3350b3 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py +++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Synthetic dataset generators.""" +"""Synthetic dataset generators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,8 +26,10 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.learn.python.learn.datasets.base import Dataset +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def circles(n_samples=100, noise=None, seed=None, @@ -93,6 +100,7 @@ def circles(n_samples=100, return Dataset(data=X[indices], target=y[indices]) +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def spirals(n_samples=100, noise=None, seed=None, diff --git a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py index 2596a2ecaf..ce94663017 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Text datasets.""" +"""Text datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,10 +31,12 @@ import numpy as np from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated DBPEDIA_URL = 'https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz' +@deprecated(None, 'See contrib/learn/README.md') def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') @@ -41,6 +48,7 @@ def maybe_download_dbpedia(data_dir): tfile.extractall(data_dir) +@deprecated(None, 'See contrib/learn/README.md') def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py index 4981750c94..3e64595f31 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py +++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""An estimator is a rule for calculating an estimate of a given quantity. +"""An estimator is a rule for calculating an estimate of a given quantity (deprecated). + +These classes are deprecated and replaced with `tf.estimator`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. # Estimators diff --git a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py index 15277415a1..1f0e4663d0 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== -"""sklearn cross-support.""" +"""sklearn cross-support (deprecated).""" from __future__ import absolute_import from __future__ import division @@ -132,6 +132,8 @@ class _TransformerMixin(): class NotFittedError(ValueError, AttributeError): """Exception class to raise if estimator is used before fitting. + USE OF THIS EXCEPTION IS DEPRECATED. + This class inherits from both ValueError and AttributeError to help with exception handling and backward compatibility. diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py index a02c726c74..1fa58271e2 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py +++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow composable models used as building blocks for estimators.""" +"""TensorFlow composable models used as building blocks for estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,6 +39,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated class _ComposableModel(object): @@ -46,6 +52,7 @@ class _ComposableModel(object): _ComposableModel and its subclasses are not part of the public tf.learn API. """ + @deprecated(None, "Please use model_fns in tf.estimator.") def __init__(self, num_label_columns, optimizer, @@ -141,6 +148,10 @@ class _ComposableModel(object): class LinearComposableModel(_ComposableModel): """A _ComposableModel that implements linear regression. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ @@ -252,6 +263,10 @@ class LinearComposableModel(_ComposableModel): class DNNComposableModel(_ComposableModel): """A _ComposableModel that implements a DNN. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ diff --git a/tensorflow/contrib/learn/python/learn/estimators/constants.py b/tensorflow/contrib/learn/python/learn/estimators/constants.py index fc69e81024..d2548946bc 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/constants.py +++ b/tensorflow/contrib/learn/python/learn/estimators/constants.py @@ -13,9 +13,11 @@ # limitations under the License. # ============================================================================== -"""Constants regarding Estimators. +"""Constants regarding Estimators (deprecated). -This file is obsoleted in the move of Estimator to core. +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ from __future__ import absolute_import from __future__ import division @@ -25,6 +27,8 @@ from __future__ import print_function class ProblemType(object): """Enum-like values for the type of problem that the model solves. + THIS CLASS IS DEPRECATED. + These values are used when exporting the model to produce the appropriate signature function for serving. diff --git a/tensorflow/contrib/learn/python/learn/estimators/debug.py b/tensorflow/contrib/learn/python/learn/estimators/debug.py index 9d5f6c2bf9..24b067b7e3 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/debug.py +++ b/tensorflow/contrib/learn/python/learn/estimators/debug.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Debug estimators. +"""Debug estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Debug estimators are bias-only estimators that can be used for debugging and as simple baselines. @@ -118,6 +122,10 @@ def debug_model_fn(features, labels, mode, params, config=None): class DebugClassifier(estimator.Estimator): """A classifier for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -237,6 +245,10 @@ class DebugClassifier(estimator.Estimator): class DebugRegressor(estimator.Estimator): """A regressor for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py index c17b41c0f7..eabebb7e88 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Deep Neural Network estimators.""" +"""Deep Neural Network estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -212,6 +217,10 @@ def _dnn_model_fn(features, labels, mode, params, config=None): class DNNClassifier(estimator.Estimator): """A classifier for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -521,6 +530,10 @@ class DNNClassifier(estimator.Estimator): class DNNRegressor(estimator.Estimator): """A regressor for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -796,6 +809,10 @@ class DNNRegressor(estimator.Estimator): class DNNEstimator(estimator.Estimator): """A Estimator for TensorFlow DNN models with user specified _Head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py index 7266122350..3d85533d92 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow estimators for Linear and DNN joined training models.""" +"""TensorFlow estimators for Linear and DNN joined training models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -372,6 +377,10 @@ def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): class DNNLinearCombinedEstimator(estimator.Estimator): """An estimator for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -490,6 +499,10 @@ class DNNLinearCombinedEstimator(estimator.Estimator): class DNNLinearCombinedClassifier(estimator.Estimator): """A classifier for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -832,6 +845,10 @@ class DNNLinearCombinedClassifier(estimator.Estimator): class DNNLinearCombinedRegressor(estimator.Estimator): """A regressor for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index 69440e823e..a703dc66e9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for Dynamic RNNs.""" +"""Estimator for Dynamic RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -540,6 +545,12 @@ def _get_dynamic_rnn_model_fn( class DynamicRnnEstimator(estimator.Estimator): + """Dynamically unrolled RNN (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 4b63e08ab3..5262e04e16 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base Estimator class.""" +"""Base Estimator class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -138,6 +143,7 @@ def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): return df.input_builder, df.get_feed_dict_fn() +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input_fn(input_fn): """Creates `FeatureColumn` objects for inputs defined by `input_fn`. @@ -158,6 +164,7 @@ def infer_real_valued_columns_from_input_fn(input_fn): return layers.infer_real_valued_columns(features) +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input(x): """Creates `FeatureColumn` objects for inputs defined by input `x`. @@ -389,6 +396,10 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, trainable.Trainable): """Abstract BaseEstimator class to train and evaluate TensorFlow models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Users should not instantiate or subclass this class. Instead, use an `Estimator`. """ @@ -399,6 +410,8 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, # TODO(wicke): Remove this once launcher takes over config functionality _Config = run_config.RunConfig # pylint: disable=invalid-name + @deprecated(None, 'Please replace uses of any Estimator from tf.contrib.learn' + ' with an Estimator from tf.estimator.*') def __init__(self, model_dir=None, config=None): """Initializes a BaseEstimator instance. @@ -1074,6 +1087,10 @@ def _identity_feature_engineering_fn(features, labels): class Estimator(BaseEstimator): """Estimator class is the basic TensorFlow model trainer/evaluator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ def __init__(self, @@ -1458,8 +1475,14 @@ class Estimator(BaseEstimator): # For time of deprecation x,y from Estimator allow direct access. # pylint: disable=protected-access class SKCompat(sklearn.BaseEstimator): - """Scikit learn wrapper for TensorFlow Learn Estimator.""" + """Scikit learn wrapper for TensorFlow Learn Estimator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please switch to the Estimator interface.') def __init__(self, estimator): self._estimator = estimator diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py index fd47710e30..e4c31396ba 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utils for Estimator.""" +"""Utils for Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 9b124b2c19..2b4b6eff39 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Abstractions for the head(s) of a model. +"""Abstractions for the head(s) of a model (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -47,11 +52,16 @@ from tensorflow.python.summary import summary from tensorflow.python.training import training from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated class Head(object): """Interface for the head/top of a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Given logits (or output of a hidden layer), a Head knows how to compute predictions, loss, default metric and export signature. It is meant to, @@ -177,6 +187,7 @@ class Head(object): raise NotImplementedError("Calling an abstract method.") +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -216,6 +227,7 @@ def regression_head(label_name=None, link_fn=(link_fn if link_fn is not None else array_ops.identity)) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def poisson_regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -254,6 +266,7 @@ def poisson_regression_head(label_name=None, # TODO(zakaria): Consider adding a _RegressionHead for logistic_regression +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_class_head(n_classes, label_name=None, weight_column_name=None, @@ -335,6 +348,7 @@ def multi_class_head(n_classes, label_keys=label_keys) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def binary_svm_head( label_name=None, weight_column_name=None, @@ -370,6 +384,7 @@ def binary_svm_head( thresholds=thresholds) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_label_head(n_classes, label_name=None, weight_column_name=None, @@ -430,6 +445,7 @@ def multi_label_head(n_classes, loss_fn=_wrap_custom_loss_fn(loss_fn) if loss_fn else None) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def loss_only_head(loss_fn, head_name=None): """Creates a Head that contains only loss terms. @@ -447,6 +463,7 @@ def loss_only_head(loss_fn, head_name=None): return _LossOnlyHead(loss_fn, head_name=head_name) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_head(heads, loss_weights=None): """Creates a MultiHead stemming from same logits/hidden layer. @@ -479,6 +496,7 @@ def multi_head(heads, loss_weights=None): return _MultiHead(heads, loss_merger=_weighted_loss_merger) +@deprecated(None, "Use 'lambda _: tf.no_op()'.") def no_op_train_fn(loss): del loss return control_flow_ops.no_op() diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py index 8f9d6fc318..66ebcfd1d8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py +++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementation of k-means clustering on top of `Estimator` API. +"""Implementation of k-means clustering on top of `Estimator` API (deprecated). This module is deprecated. Please use @{tf.contrib.factorization.KMeansClustering} instead of @@ -153,7 +153,12 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config): # TODO(agarwal,ands): support sharded input. class KMeansClustering(estimator.Estimator): - """An Estimator for K-Means clustering.""" + """An Estimator for K-Means clustering. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE COSINE_DISTANCE = clustering_ops.COSINE_DISTANCE RANDOM_INIT = clustering_ops.RANDOM_INIT diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index 37aa8b3396..64d7ecc68e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Linear Estimators.""" +"""Linear Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -305,6 +310,10 @@ class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): class LinearClassifier(estimator.Estimator): """Linear classifier model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear model to classify instances into one of multiple possible classes. When number of possible classes is 2, this is binary classification. @@ -625,6 +634,10 @@ class LinearClassifier(estimator.Estimator): class LinearRegressor(estimator.Estimator): """Linear regressor model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear regression model to predict label value given observation of feature values. @@ -860,6 +873,10 @@ class LinearRegressor(estimator.Estimator): class LinearEstimator(estimator.Estimator): """Linear model with user specified head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a generalized linear model to predict label value given observation of feature values. diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py index fb339160d5..3cbcc6e98d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py +++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Logistic regression (aka binary classifier) class. +"""Logistic regression (aka binary classifier) class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This defines some useful basic metrics for using logistic regression to classify a binary event (0 vs 1). @@ -75,6 +79,10 @@ def LogisticRegressor( # pylint: disable=invalid-name feature_engineering_fn=None): """Builds a logistic regression Estimator for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This method provides a basic Estimator with some additional metrics for custom binary classification models, including AUC, precision/recall and accuracy. diff --git a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py index 99388f116b..f264248e44 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for metric keys.""" +"""Enum for metric keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function class MetricKey(object): - """Metric key strings.""" + """Metric key strings (deprecated).""" + LOSS = "loss" AUC = "auc" AUC_PR = "auc_precision_recall" diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 44e6c7c52d..dcb161180c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Classes and methods related to model_fn.""" +"""Classes and methods related to model_fn (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -37,10 +42,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import session_run_hook +from tensorflow.python.util.deprecation import deprecated class ModeKeys(object): - """Standard names for model modes. + """Standard names for model modes (deprecated). + + THIS CLASS IS DEPRECATED. The following standard keys are defined: @@ -65,8 +73,16 @@ class ModelFnOps( 'output_alternatives', 'training_chief_hooks', 'training_hooks', 'scaffold', 'mode' ])): - """Ops returned from a model_fn.""" + """Ops returned from a model_fn. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'When switching to tf.estimator.Estimator, use ' + 'tf.estimator.EstimatorSpec. You can use the `estimator_spec`' + ' method to create an equivalent one.') def __new__(cls, mode, predictions=None, diff --git a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py index f8d87b8914..6fd2fc9d59 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for model prediction keys. +"""Enum for model prediction keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This file is obsoleted in the move of Estimator to core. """ @@ -22,6 +26,8 @@ from __future__ import print_function class PredictionKey(object): + """THIS CLASS IS DEPRECATED.""" + CLASSES = "classes" PROBABILITIES = "probabilities" LOGITS = "logits" diff --git a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py index 2752bc2d90..215022e5d9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py +++ b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Common operations for RNN Estimators.""" +"""Common operations for RNN Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index fd90fd1cc6..1d161093de 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Run Config.""" +"""Run Config (deprecated, use tf.estimator.RunConfig instead). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +34,12 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as core_run_config from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.util.deprecation import deprecated # A list of the property names in RunConfig user allows to change. They will # not affect the execution framework, so when execution framework checks the -# `uid` of the RunConfig, it should be ingored. +# `uid` of the RunConfig, it should be ignored. _DEFAULT_UID_WHITE_LIST = [ 'tf_random_seed', 'save_summary_steps', @@ -47,6 +53,7 @@ _DEFAULT_UID_WHITE_LIST = [ class Environment(object): + """DEPRECATED CLASS.""" # For running general distributed training. CLOUD = 'cloud' # For running Google-internal distributed training. @@ -56,6 +63,7 @@ class Environment(object): class TaskType(object): + """DEPRECATED CLASS.""" MASTER = 'master' PS = 'ps' WORKER = 'worker' @@ -64,6 +72,8 @@ class TaskType(object): class ClusterConfig(object): """This class specifies the configurations for a distributed run. + THIS CLASS IS DEPRECATED. Use tf.estimator.RunConfig instead. + If you're using an `Estimator`, you should probably use the subclass RunConfig instead. """ @@ -211,10 +221,13 @@ class ClusterConfig(object): class RunConfig(ClusterConfig, core_run_config.RunConfig): """This class specifies the configurations for an `Estimator` run. - This class is the implementation of @{tf.estimator.RunConfig} interface. + This class is a deprecated implementation of @{tf.estimator.RunConfig} + interface. """ _USE_DEFAULT = 0 + @deprecated(None, 'When switching to tf.estimator.Estimator, use' + ' tf.estimator.RunConfig instead.') def __init__(self, master=None, num_cores=0, diff --git a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py index 0cea35e219..de78c72c3a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for State Saving RNNs.""" +"""Estimator for State Saving RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -528,6 +533,12 @@ def _get_rnn_model_fn(cell_type, class StateSavingRnnEstimator(estimator.Estimator): + """RNN with static unrolling and state saving (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py index 72920d73c0..3459997bab 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/svm.py +++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Support Vector Machine (SVM) Estimator.""" +"""Support Vector Machine (SVM) Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -36,6 +41,10 @@ def _as_iterable(preds, output): class SVM(estimator.Estimator): """Support Vector Machine (SVM) model for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Currently, only linear SVMs are supported. For the underlying optimization problem, the `SDCAOptimizer` is used. For performance and convergence tuning, the num_loss_partitions parameter passed to `SDCAOptimizer` (see `__init__()` diff --git a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py index a120bc6cc3..71b5658dd1 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py +++ b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorSignature class and utilities.""" +"""TensorSignature class and utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -33,6 +38,10 @@ class TensorSignature(collections.namedtuple( "TensorSignature", ["dtype", "shape", "is_sparse"])): """Signature of the `Tensor` object. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Useful to check compatibility of tensors. Example: diff --git a/tensorflow/contrib/learn/python/learn/estimators/test_data.py b/tensorflow/contrib/learn/python/learn/estimators/test_data.py index ed201bfc58..e4b057b4f5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/test_data.py +++ b/tensorflow/contrib/learn/python/learn/estimators/test_data.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Test data utilities.""" +"""Test data utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/evaluable.py b/tensorflow/contrib/learn/python/learn/evaluable.py index 8f6cd39864..10881ca885 100644 --- a/tensorflow/contrib/learn/python/learn/evaluable.py +++ b/tensorflow/contrib/learn/python/learn/evaluable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Evaluable` interface.""" +"""`Evaluable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,10 @@ import abc class Evaluable(object): """Interface for objects that are evaluatable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 331bc11549..9a7c4cd685 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experiment class collecting information needed for a single training run.""" +"""Experiment class collecting information for a single training run (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -25,7 +30,6 @@ import os import time from tensorflow.contrib.framework import deprecated -from tensorflow.contrib.framework import deprecated_args from tensorflow.contrib.framework.python.framework import experimental from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import export_strategy @@ -118,6 +122,10 @@ class _EvalAndExportListener(basic_session_run_hooks.CheckpointSaverListener): class Experiment(object): """Experiment is a class containing all information needed to train a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + After an experiment is created (by passing an Estimator and inputs for training and evaluation), an Experiment instance knows how to invoke training and eval loops in a sensible fashion for distributed training. @@ -125,16 +133,8 @@ class Experiment(object): # TODO(ispir): remove delay_workers_by_global_step and make global step based # waiting as only behavior. - @deprecated_args( - "2016-10-23", - "local_eval_frequency is deprecated as local_run will be renamed to " - "train_and_evaluate. Use min_eval_frequency and call train_and_evaluate " - "instead. Note, however, that the default for min_eval_frequency is 1, " - "meaning models will be evaluated every time a new checkpoint is " - "available. In contrast, the default for local_eval_frequency is None, " - "resulting in evaluation occurring only after training has completed. " - "min_eval_frequency is ignored when calling the deprecated local_run.", - "local_eval_frequency") + @deprecated(None, "Please switch to tf.estimator.train_and_evaluate. You will" + " also have to convert to a tf.estimator.Estimator.") def __init__(self, estimator, train_input_fn, diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py index 55a8b82431..075cab536e 100644 --- a/tensorflow/contrib/learn/python/learn/export_strategy.py +++ b/tensorflow/contrib/learn/python/learn/export_strategy.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""ExportStrategy class represents different flavors of model export.""" +"""ExportStrategy class represents different flavors of model export (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,7 @@ from __future__ import print_function import collections from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated __all__ = ['ExportStrategy'] @@ -30,6 +36,10 @@ class ExportStrategy( ['name', 'export_fn', 'strip_default_attrs'])): """A class representing a type of model export. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Typically constructed by a utility function specific to the exporter, such as `saved_model_export_utils.make_export_strategy()`. @@ -56,6 +66,8 @@ class ExportStrategy( forward compatibility of the resulting `SavedModel`. """ + @deprecated(None, 'Please switch to tf.estimator.train_and_evaluate, and use ' + 'tf.estimator.Exporter.') def __new__(cls, name, export_fn, strip_default_attrs=None): return super(ExportStrategy, cls).__new__( cls, name, export_fn, strip_default_attrs) diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py index 98365c05f6..a997fab723 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level operations on graphs.""" +"""High level operations on graphs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -68,6 +73,7 @@ def clear_summary_writers(): return summary_io.SummaryWriterCache.clear() +@deprecated(None, 'Use `SummaryWriterCache.get` directly.') def get_summary_writer(logdir): """Returns single SummaryWriter per logdir in current run. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py index 06c3782a47..8b133a4440 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tools to allow different io formats.""" +"""Tools to allow different io formats (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py index 7d666391ce..e0a1948d95 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Methods to allow dask.DataFrame.""" +"""Methods to allow dask.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,8 @@ from __future__ import print_function import numpy as np +from tensorflow.python.util.deprecation import deprecated + try: # pylint: disable=g-import-not-at-top import dask.dataframe as dd @@ -60,6 +67,7 @@ def _construct_dask_df_with_divisions(df): return dd.Series(merge(dsk, df.dask), name, df.name, divisions) +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_data(data): """Extract data from dask.Series or dask.DataFrame for predictors. @@ -81,6 +89,7 @@ def extract_dask_data(data): return data +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_labels(labels): """Extract data from dask.Series or dask.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 96be8b1bc4..c45b1d1864 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementations of different data feeders to provide data for TF trainer.""" +"""Implementations of different data feeders to provide data for TF trainer (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" # TODO(ipolosukhin): Replace this module with feed-dict queue runners & queues. @@ -31,6 +36,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels @@ -101,6 +107,7 @@ def _is_iterable(x): return hasattr(x, 'next') or hasattr(x, '__next__') +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_train_data_feeder(x, y, n_classes, @@ -188,6 +195,7 @@ def _batch_data(x, batch_size=None): yield np.matrix(chunk) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_predict_data_feeder(x, batch_size=None): """Returns an iterable for feeding into predict step. @@ -219,6 +227,7 @@ def setup_predict_data_feeder(x, batch_size=None): return [x] +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_processor_data_feeder(x): """Sets up processor iterable. @@ -233,6 +242,7 @@ def setup_processor_data_feeder(x): return x +@deprecated(None, 'Please convert numpy dtypes explicitly.') def check_array(array, dtype): """Checks array on dtype and converts it if different. @@ -275,8 +285,14 @@ def _check_dtype(dtype): class DataFeeder(object): - """Data feeder is an example class to sample data for TF trainer.""" + """Data feeder is an example class to sample data for TF trainer. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, x, y, @@ -563,6 +579,10 @@ class DataFeeder(object): class StreamingDataFeeder(DataFeeder): """Data feeder for TF trainer that reads data from iterator. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Streaming data feeder allows to read data as it comes it from disk or somewhere else. It's custom to have this iterators rotate infinetly over the dataset, to allow control of how much to learn on the trainer side. @@ -771,11 +791,16 @@ class StreamingDataFeeder(DataFeeder): class DaskDataFeeder(object): """Data feeder for that reads data from dask.Series and dask.DataFrame. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Numpy arrays can be serialized to disk and it's possible to do random seeks into them. DaskDataFeeder will remove requirement to have full dataset in the memory and still do random seeks for sampling of batches. """ + @deprecated(None, 'Please feed input to tf.data to support dask.') def __init__(self, x, y, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py index 884faf8335..f8aaa0c9e3 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow generator of dict with numpy arrays.""" +"""Methods to allow generator of dict with numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,8 +28,10 @@ from types import FunctionType from types import GeneratorType from tensorflow.python.estimator.inputs.queues.feeding_functions import _enqueue_data as enqueue_data +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.data.') def generator_input_fn(x, target_key=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py index 3a46c23968..9e816f54b6 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to read data in the graph.""" +"""Methods to read data in the graph (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,11 +39,13 @@ from tensorflow.python.platform import gfile from tensorflow.python.summary import summary from tensorflow.python.training import input as input_ops from tensorflow.python.training import queue_runner +from tensorflow.python.util.deprecation import deprecated # Default name for key in the feature dict. KEY_FEATURE_NAME = '__key__' +@deprecated(None, 'Use tf.data.') def read_batch_examples(file_pattern, batch_size, reader, @@ -106,6 +113,7 @@ def read_batch_examples(file_pattern, return examples +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples(file_pattern, batch_size, reader, @@ -175,6 +183,7 @@ def read_keyed_batch_examples(file_pattern, seed=seed) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples_shared_queue(file_pattern, batch_size, reader, @@ -452,6 +461,7 @@ def _read_keyed_batch_examples_helper(file_pattern, return queued_examples_with_keys +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features(file_pattern, batch_size, features, @@ -540,6 +550,7 @@ def read_keyed_batch_features(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features_shared_queue(file_pattern, batch_size, features, @@ -620,6 +631,7 @@ def read_keyed_batch_features_shared_queue(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def queue_parsed_features(parsed_features, keys=None, feature_queue_capacity=100, @@ -742,6 +754,7 @@ def queue_parsed_features(parsed_features, return dequeued_keys, dequeued_parsed_features +@deprecated(None, 'Use tf.data.') def read_batch_features(file_pattern, batch_size, features, @@ -821,6 +834,7 @@ def read_batch_features(file_pattern, return features +@deprecated(None, 'Use tf.data.') def read_batch_record_features(file_pattern, batch_size, features, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py index 692438807f..29552d24f1 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py @@ -12,15 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow dict of numpy arrays.""" +"""Methods to allow dict of numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.numpy_io import numpy_input_fn as core_numpy_input_fn +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Use tf.estimator.inputs.numpy_input_fn.') def numpy_input_fn(x, y=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py index ede7558eaf..b4ef055f5a 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py @@ -13,13 +13,19 @@ # limitations under the License. # ============================================================================== -"""Methods to allow pandas.DataFrame.""" +"""Methods to allow pandas.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.pandas_io import pandas_input_fn as core_pandas_input_fn +from tensorflow.python.util.deprecation import deprecated try: # pylint: disable=g-import-not-at-top @@ -47,6 +53,7 @@ PANDAS_DTYPES = { } +@deprecated(None, 'Please use tf.estimator.inputs.pandas_input_fn') def pandas_input_fn(x, y=None, batch_size=128, @@ -66,6 +73,7 @@ def pandas_input_fn(x, target_column=target_column) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_data(data): """Extract data from pandas.DataFrame for predictors. @@ -96,6 +104,7 @@ def extract_pandas_data(data): 'float, or bool. Found: ' + ', '.join(error_report)) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_matrix(data): """Extracts numpy matrix from pandas DataFrame. @@ -111,6 +120,7 @@ def extract_pandas_matrix(data): return data.as_matrix() +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_labels(labels): """Extract data from pandas.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner.py b/tensorflow/contrib/learn/python/learn/learn_runner.py index 2af723a0d6..d719a3e488 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Runs an Experiment.""" +"""Runs an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ from tensorflow.contrib.learn.python.learn.estimators import run_config as run_c from tensorflow.contrib.learn.python.learn.experiment import Experiment from tensorflow.contrib.training.python.training import hparam as hparam_lib from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # TODO(xiejw): Refactor the learn_runner to make code reusable. @@ -99,6 +105,7 @@ def _wrapped_experiment_fn_with_uid_check(experiment_fn, require_hparams=False): return wrapped_experiment_fn +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def run(experiment_fn, output_dir=None, schedule=None, run_config=None, hparams=None): """Make and run an experiment. @@ -218,6 +225,7 @@ def run(experiment_fn, output_dir=None, schedule=None, run_config=None, return _execute_schedule(experiment, schedule) +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def tune(experiment_fn, tuner): """Tune an experiment with hyper-parameters. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py index 7d9b1c7716..ba2d067787 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities to run and tune an Experiment. +"""Utilities to run and tune an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@run @@tune diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py index 6440bc204b..97220365d5 100644 --- a/tensorflow/contrib/learn/python/learn/metric_spec.py +++ b/tensorflow/contrib/learn/python/learn/metric_spec.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""The metric spec class to flexibly connect models and metrics.""" +"""The metric spec class to flexibly connect models and metrics (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ import six from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated def _assert_named_args(sentinel): @@ -223,6 +229,10 @@ def _adapt_metric_fn( class MetricSpec(object): """MetricSpec connects a model to metric functions. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + The MetricSpec class contains all information necessary to connect the output of a `model_fn` to the metrics (usually, streaming metrics) that are used in evaluation. @@ -284,6 +294,7 @@ class MetricSpec(object): """ + @deprecated(None, 'Use tf.estimator.EstimatorSpec.eval_metric_ops.') def __init__(self, metric_fn, prediction_key=None, diff --git a/tensorflow/contrib/learn/python/learn/models.py b/tensorflow/contrib/learn/python/learn/models.py index 4283240d01..bd4bbf9f8c 100644 --- a/tensorflow/contrib/learn/python/learn/models.py +++ b/tensorflow/contrib/learn/python/learn/models.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Various high level TF models.""" +"""Various high level TF models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -28,8 +33,10 @@ from tensorflow.python.ops import array_ops as array_ops_ from tensorflow.python.ops import init_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using a tf.estimator.LinearRegressor') def linear_regression_zero_init(x, y): """Linear regression subgraph with zero-value initial weights and bias. @@ -43,6 +50,7 @@ def linear_regression_zero_init(x, y): return linear_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.LinearClassifier') def logistic_regression_zero_init(x, y): """Logistic regression subgraph with zero-value initial weights and bias. @@ -56,6 +64,7 @@ def logistic_regression_zero_init(x, y): return logistic_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.') def linear_regression(x, y, init_mean=None, init_stddev=1.0): """Creates linear regression TensorFlow subgraph. @@ -107,6 +116,7 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0): return losses_ops.mean_squared_error_regressor(x, y, weights, bias) +@deprecated(None, 'Consider using a class from tf.estimator.') def logistic_regression(x, y, class_weight=None, @@ -203,6 +213,7 @@ def _reverse_seq(input_seq, lengths): return result +@deprecated(None, 'Please consider `tf.nn.bidirectional_dynamic_rnn`.') def bidirectional_rnn(cell_fw, cell_bw, inputs, @@ -283,6 +294,7 @@ def bidirectional_rnn(cell_fw, # End of TensorFlow 0.7 +@deprecated(None, 'Please consider tensorflow/tensor2tensor.') def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, target_predictor_fn, sequence_length, initial_state, attn_length, attn_size, attn_vec_size): diff --git a/tensorflow/contrib/learn/python/learn/monitored_session.py b/tensorflow/contrib/learn/python/learn/monitored_session.py index 22602e9f69..ac0433f177 100644 --- a/tensorflow/contrib/learn/python/learn/monitored_session.py +++ b/tensorflow/contrib/learn/python/learn/monitored_session.py @@ -13,7 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A wrapper of Session API which runs hooks.""" +"""A wrapper of Session API which runs hooks (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index 9457a73ecf..77f7c73d54 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Monitors instrument the training process. +"""Monitors instrument the training process (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@get_default_monitors @@BaseMonitor @@ -59,6 +63,10 @@ from tensorflow.python.util import tf_inspect class BaseMonitor(object): """Base class for Monitors. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Defines basic interfaces of Monitors. Monitors can either be run on all workers or, more commonly, restricted to run exclusively on the elected chief worker. @@ -229,6 +237,10 @@ def _extract_output(outputs, request): class EveryN(BaseMonitor): """Base class for monitors that execute callbacks every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This class adds three new callbacks: - every_n_step_begin - every_n_step_end @@ -418,6 +430,10 @@ class StopAtStep(BaseMonitor): class PrintTensor(EveryN): """Prints given tensors every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This is an `EveryN` monitor and has consistent semantic for `every_n` and `first_n`. @@ -455,9 +471,12 @@ class PrintTensor(EveryN): class LoggingTrainable(EveryN): """Writes trainable variable values into log every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Write the tensors in trainable variables `every_n` steps, starting with the `first_n`th step. - """ def __init__(self, scope=None, every_n=100, first_n=1): @@ -493,7 +512,12 @@ class LoggingTrainable(EveryN): class SummarySaver(EveryN): - """Saves summaries every N steps.""" + """Saves summaries every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, summary_op, @@ -554,6 +578,10 @@ class SummarySaver(EveryN): class ValidationMonitor(EveryN): """Runs evaluation of a given estimator, at most every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note that the evaluation is done based on the saved checkpoint, which will usually be older than the current step. @@ -756,6 +784,10 @@ class ValidationMonitor(EveryN): class CaptureVariable(EveryN): """Captures a variable's values into a collection. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This monitor is useful for unit testing. You should exercise caution when using this monitor in production, since it never discards values. @@ -794,6 +826,7 @@ class CaptureVariable(EveryN): self._var_values[step] = _extract_output(outputs, self._var_name) +@deprecation.deprecated(None, "Use tf.train.MonitoredTrainingSession.") def get_default_monitors(loss_op=None, summary_op=None, save_summary_steps=100, @@ -828,6 +861,10 @@ def get_default_monitors(loss_op=None, class GraphDump(BaseMonitor): """Dumps almost all tensors in the graph at every step. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note, this is very expensive, prefer `PrintTensor` in production. """ @@ -917,7 +954,12 @@ class GraphDump(BaseMonitor): class ExportMonitor(EveryN): - """Monitor that exports Estimator every N steps.""" + """Monitor that exports Estimator every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ @deprecation.deprecated("2017-03-25", "ExportMonitor is deprecated. Please pass an " @@ -1040,7 +1082,12 @@ class ExportMonitor(EveryN): class CheckpointSaver(BaseMonitor): - """Saves checkpoints every N steps or N seconds.""" + """Saves checkpoints every N steps or N seconds. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, checkpoint_dir, @@ -1125,7 +1172,12 @@ class CheckpointSaver(BaseMonitor): class StepCounter(EveryN): - """Steps per second monitor.""" + """Steps per second monitor. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None): super(StepCounter, self).__init__(every_n_steps=every_n_steps) @@ -1165,6 +1217,10 @@ class NanLossDuringTrainingError(RuntimeError): class NanLoss(EveryN): """NaN Loss monitor. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Monitors loss and stops training if loss is NaN. Can either fail with exception or just stop training. """ diff --git a/tensorflow/contrib/learn/python/learn/ops/__init__.py b/tensorflow/contrib/learn/python/learn/ops/__init__.py index 33962e34cc..efb1f47cf5 100644 --- a/tensorflow/contrib/learn/python/learn/ops/__init__.py +++ b/tensorflow/contrib/learn/python/learn/ops/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Various TensorFlow Ops.""" +"""Various TensorFlow Ops (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index fa3b7323e3..b3b067b8e1 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops to work with embeddings. +"""TensorFlow Ops to work with embeddings (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Note: categorical variables are handled via embeddings in many cases. For example, in case of words. diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py index b040ab3bb6..92976d1539 100644 --- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for loss computation.""" +"""TensorFlow Ops for loss computation (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py index 45727faab4..aa37cb4a76 100644 --- a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for Sequence to Sequence models.""" +"""TensorFlow Ops for Sequence to Sequence models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,8 +31,10 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. @@ -57,6 +64,7 @@ def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): return array_ops.stack(predictions, axis=1), loss +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): """Processes inputs for Sequence to Sequence models. @@ -87,6 +95,7 @@ def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): return in_x, in_y, out_y +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): """RNN Decoder that creates training and sampling sub-graphs. @@ -123,6 +132,7 @@ def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): return outputs, states, sampling_outputs, sampling_states +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_seq2seq(encoder_inputs, decoder_inputs, encoder_cell, diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py index 7bcc177d4e..e8c6e1acf8 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Preprocessing tools useful for building models.""" +"""Preprocessing tools useful for building models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py index 154739d497..faba3b2025 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements preprocessing transformers for categorical variables.""" +"""Implements preprocessing transformers for categorical variables (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,8 @@ from __future__ import print_function import math import numpy as np +from tensorflow.python.util.deprecation import deprecated + # pylint: disable=g-bad-import-order from . import categorical_vocabulary from ..learn_io.data_feeder import setup_processor_data_feeder @@ -31,10 +38,16 @@ from ..learn_io.data_feeder import setup_processor_data_feeder class CategoricalProcessor(object): """Maps documents to sequences of word ids. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + As a common convention, Nan values are handled as unknown tokens. Both float('nan') and np.nan are accepted. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data for sequence ' + 'processing.') def __init__(self, min_frequency=0, share=False, vocabularies=None): """Initializes a CategoricalProcessor instance. diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py index 5709955c49..3ac370a6ab 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""Categorical vocabulary classes to map categories to indexes. +"""Categorical vocabulary classes to map categories to indexes (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Can be used for categorical variables, sparse variables and words. """ @@ -25,14 +29,21 @@ from __future__ import print_function import collections import six +from tensorflow.python.util.deprecation import deprecated + class CategoricalVocabulary(object): """Categorical variables vocabulary class. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Accumulates and provides mapping from classes to indexes. Can be easily used for words. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, unknown_token="", support_reverse=True): self._unknown_token = unknown_token self._mapping = {unknown_token: 0} diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/text.py b/tensorflow/contrib/learn/python/learn/preprocessing/text.py index 3af2074c2a..f2b6776be7 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/text.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/text.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements a number of text preprocessing utilities.""" +"""Implements a number of text preprocessing utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -24,6 +29,7 @@ import numpy as np import six from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated from .categorical_vocabulary import CategoricalVocabulary # pylint: disable=g-bad-import-order @@ -38,6 +44,7 @@ TOKENIZER_RE = re.compile(r"[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", re.UNICODE) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def tokenizer(iterator): """Tokenizer generator. @@ -51,9 +58,16 @@ def tokenizer(iterator): yield TOKENIZER_RE.findall(value) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') class ByteProcessor(object): - """Maps documents into sequence of ids for bytes.""" + """Maps documents into sequence of ids for bytes. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length): self.max_document_length = max_document_length @@ -108,8 +122,14 @@ class ByteProcessor(object): class VocabularyProcessor(object): - """Maps documents to sequences of word ids.""" + """Maps documents to sequences of word ids. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length, min_frequency=0, diff --git a/tensorflow/contrib/learn/python/learn/session_run_hook.py b/tensorflow/contrib/learn/python/learn/session_run_hook.py index a8ba2be972..87edc9b720 100644 --- a/tensorflow/contrib/learn/python/learn/session_run_hook.py +++ b/tensorflow/contrib/learn/python/learn/session_run_hook.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""This file is deprecated. Use tensorflow.python.training.session_run_hook.""" +"""This file is deprecated. Use `tensorflow.python.training.session_run_hook`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py index 919d415c30..d663cf5fb7 100644 --- a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py +++ b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Wrapper for a Session-like object that handles threads and recovery. +"""Wrapper for a Session-like object that handles threads and recovery (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. Based on an original design of Illia Polosukhin. """ diff --git a/tensorflow/contrib/learn/python/learn/trainable.py b/tensorflow/contrib/learn/python/learn/trainable.py index 429b6040be..a1a3f20dcd 100644 --- a/tensorflow/contrib/learn/python/learn/trainable.py +++ b/tensorflow/contrib/learn/python/learn/trainable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Trainable` interface.""" +"""`Trainable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,8 @@ import abc class Trainable(object): """Interface for objects that are trainable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/utils/__init__.py b/tensorflow/contrib/learn/python/learn/utils/__init__.py index 48978d0ac3..66d8dc6fd4 100644 --- a/tensorflow/contrib/learn/python/learn/utils/__init__.py +++ b/tensorflow/contrib/learn/python/learn/utils/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Learn Utils.""" +"""TensorFlow Learn Utils (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py index cb34cb1d26..3eacac7a3d 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export.py +++ b/tensorflow/contrib/learn/python/learn/utils/export.py @@ -13,14 +13,18 @@ # limitations under the License. # ============================================================================== -"""Export utilities.""" +"""Export utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.framework import deprecated -from tensorflow.python.training import training_util from tensorflow.contrib.session_bundle import exporter from tensorflow.contrib.session_bundle import gc from tensorflow.python.client import session as tf_session @@ -32,6 +36,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import saver as tf_saver +from tensorflow.python.training import training_util @deprecated('2017-03-25', 'Please use Estimator.export_savedmodel() instead.') diff --git a/tensorflow/contrib/learn/python/learn/utils/gc.py b/tensorflow/contrib/learn/python/learn/utils/gc.py index 226915987a..916aecbea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/gc.py +++ b/tensorflow/contrib/learn/python/learn/utils/gc.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -r"""System for specifying garbage collection (GC) of path based data. +r"""System for specifying garbage collection (GC) of path based data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This framework allows for GC of data specified by path names, for example files on disk. gc.Path objects each represent a single item stored at a path and may @@ -73,10 +77,12 @@ import os from tensorflow.python.platform import gfile from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated Path = collections.namedtuple('Path', 'path export_version') +@deprecated(None, 'Please implement your own file management or use Saver.') def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. @@ -97,6 +103,7 @@ def largest_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def one_of_every_n_export_versions(n): """Creates a filter that keeps one of every n export versions. @@ -128,6 +135,7 @@ def one_of_every_n_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def mod_export_version(n): """Creates a filter that keeps every export that is a multiple of n. @@ -146,6 +154,7 @@ def mod_export_version(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def union(lf, rf): """Creates a filter that keeps the union of two filters. @@ -163,6 +172,7 @@ def union(lf, rf): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def negation(f): """Negate a filter. @@ -179,6 +189,7 @@ def negation(f): return keep +@deprecated(None, 'Please implement your own file name management.') def get_paths(base_dir, parser): """Gets a list of Paths in a given directory. diff --git a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py index b2521933e5..b92eb9fea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities for creating input_fns. +"""Utilities for creating input_fns (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Contents of this file are moved to tensorflow/python/estimator/export.py. InputFnOps is renamed to ServingInputReceiver. @@ -32,13 +36,17 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.util.deprecation import deprecated class InputFnOps(collections.namedtuple('InputFnOps', ['features', 'labels', 'default_inputs'])): - """A return type for an input_fn. + """A return type for an input_fn (deprecated). + + THIS CLASS IS DEPRECATED. Please use tf.estimator.export.ServingInputReceiver + instead. This return type is currently only supported for serving input_fn. Training and eval input_fn should return a `(features, labels)` tuple. @@ -56,6 +64,8 @@ class InputFnOps(collections.namedtuple('InputFnOps', """ +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_parsing_serving_input_receiver_fn.') def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): """Build an input_fn appropriate for serving, expecting fed tf.Examples. @@ -84,6 +94,8 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): return input_fn +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_raw_serving_input_receiver_fn.') def build_default_serving_input_fn(features, default_batch_size=None): """Build an input_fn appropriate for serving, expecting feature Tensors. diff --git a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py index 6a63fb545a..6dbaa15f83 100644 --- a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py +++ b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A simple script for inspect checkpoint files.""" +"""A simple script for inspect checkpoint files (deprecated).""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 1593380007..213619a187 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities supporting export to SavedModel. +"""Utilities supporting export to SavedModel (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Some contents of this file are moved to tensorflow/python/estimator/export.py: @@ -52,8 +56,9 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.summary import summary_iterator from tensorflow.python.training import saver - from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated + # A key for use in the input_alternatives dict indicating the default input. # This is the input that will be expected when a serving request does not @@ -77,6 +82,7 @@ FEATURES_INPUT_ALTERNATIVE_KEY = 'features_input_alternative' _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY = 'default_output_alternative' +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_standardized_signature_def(input_tensors, output_tensors, problem_type): """Build a SignatureDef using problem type and input and output Tensors. @@ -156,6 +162,7 @@ def _is_regression_problem(problem_type, input_tensors, output_tensors): len(input_tensors) == 1 and len(output_tensors) == 1) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_input_alternatives(input_ops): """Obtain all input alternatives using the input_fn output and heuristics.""" input_alternatives = {} @@ -181,6 +188,7 @@ def get_input_alternatives(input_ops): return input_alternatives, features +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): """Obtain all output alternatives using the model_fn output and heuristics. @@ -246,6 +254,7 @@ def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): sorted(output_alternatives.keys()))) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_all_signature_defs(input_alternatives, output_alternatives, actual_default_output_alternative_key): """Build `SignatureDef`s from all pairs of input and output alternatives.""" @@ -279,6 +288,7 @@ def build_all_signature_defs(input_alternatives, output_alternatives, MAX_DIRECTORY_CREATION_ATTEMPTS = 10 +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_timestamped_export_dir(export_dir_base): """Builds a path to a new subdirectory within the base directory. @@ -317,6 +327,7 @@ def get_timestamped_export_dir(export_dir_base): '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_temp_export_dir(timestamped_export_dir): """Builds a directory name based on the argument but starting with 'temp-'. @@ -344,6 +355,7 @@ def _export_version_parser(path): return path._replace(export_version=int(filename)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_most_recent_export(export_dir_base): """Locate the most recent SavedModel export in a directory of many exports. @@ -363,6 +375,7 @@ def get_most_recent_export(export_dir_base): return next(iter(results or []), None) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def garbage_collect_exports(export_dir_base, exports_to_keep): """Deletes older exports, retaining only a given number of the most recent. @@ -387,6 +400,7 @@ def garbage_collect_exports(export_dir_base, exports_to_keep): logging.warn('Can not delete %s recursively: %s', p.path, e) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_export_strategy(serving_input_fn, default_output_alternative_key=None, assets_extra=None, @@ -469,6 +483,8 @@ def make_export_strategy(serving_input_fn, return export_strategy.ExportStrategy('Servo', export_fn, strip_default_attrs) +@deprecated(None, + 'Use tf.estimator.export.build_parsing_serving_input_receiver_fn') def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, @@ -555,8 +571,14 @@ def _default_compare_fn(curr_best_eval_result, cand_eval_result): class BestModelSelector(object): - """A helper that keeps track of export selection candidates.""" + """A helper that keeps track of export selection candidates. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def __init__(self, event_file_pattern=None, compare_fn=None): """Constructor of this class. @@ -622,6 +644,7 @@ class BestModelSelector(object): return best_eval_result +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_best_model_export_strategy( serving_input_fn, exports_to_keep=1, @@ -707,6 +730,7 @@ def make_best_model_export_strategy( # TODO(b/67013778): Revisit this approach when corresponding changes to # TF Core are finalized. +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def extend_export_strategy(base_export_strategy, post_export_fn, post_export_name=None): diff --git a/tensorflow/python/util/decorator_utils.py b/tensorflow/python/util/decorator_utils.py index df259c7f7c..7b4363c0e4 100644 --- a/tensorflow/python/util/decorator_utils.py +++ b/tensorflow/python/util/decorator_utils.py @@ -82,7 +82,7 @@ def add_notice_to_docstring( lines = _normalize_docstring(doc).splitlines() lines[0] += ' ' + suffix_str - notice = [''] + notice + [instructions] + notice = [''] + notice + ([instructions] if instructions else []) if len(lines) > 1: # Make sure that we keep our distance from the main body -- GitLab From 29bc0d92967d8853c872ba7f736462f1ea2fbd81 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 16:24:54 -0800 Subject: [PATCH 037/311] [XLA] In HloEvaluator, fix an issue for HandleAbs to handle complex numbers more correctly: - abs([complex numbers]) would yield floats. However since the specilization for HandleAbs is based on the return type (float), we'd CHECK fail due to float != complex when accessing the elements of the operand (complex). - enable unary_op_test for interpreter. PiperOrigin-RevId: 187099576 --- .../compiler/xla/service/hlo_evaluator.cc | 32 +++++++++++++++++-- tensorflow/compiler/xla/tests/BUILD | 1 + 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index fd06b19144..cf8b35908f 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -57,6 +57,12 @@ struct is_complex_t : public std::false_type {}; template <> struct is_complex_t : public std::true_type {}; +template +struct is_complex64_t : public std::false_type {}; + +template <> +struct is_complex64_t : public std::true_type {}; + template StatusOr> Compare(const Shape& shape, HloOpcode opcode, const Literal& lhs_literal, @@ -248,17 +254,37 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { template < typename NativeT, - typename std::enable_if::value || - is_complex_t::value>::type* = nullptr> + typename std::enable_if::value>::type* = nullptr> Status HandleAbs(HloInstruction* abs) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], - ElementWiseUnaryOp(abs, [](ElementwiseT elem_operand) { + ElementWiseUnaryOp(abs, [](NativeT elem_operand) { return std::abs(elem_operand); })); return Status::OK(); } + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleAbs(HloInstruction* abs) { + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(abs->operand(0)); + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[abs], + (ElementWiseUnaryOpImpl( + abs, [](NativeT elem_operand) { return std::abs(elem_operand); }, + operand_literal))); + + return Status::OK(); + } + Status HandleAbs(HloInstruction* abs) override { + // If the operand is of C64 type, the return type of abs will be F32. + // However, ElementwiseT would still be the return type, F32, and thus + // specifying the ElementwiseT explicitly as C64 is needed below. + if (abs->operand(0)->shape().element_type() == C64) { + return HandleAbs(abs); + } return HandleAbs(abs); } diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 33fde9737d..f3ecfc1604 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -494,6 +494,7 @@ xla_test( xla_test( name = "unary_op_test", srcs = ["unary_op_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From e37a7ae2277a2a2f7b50ad5ef361e41c30edeb41 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 17:01:24 -0800 Subject: [PATCH 038/311] Only link the swapping code when compiling TensorFlow with CUDA support. PiperOrigin-RevId: 187104273 --- tensorflow/core/grappler/optimizers/BUILD | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 908e58bcc7..a52d1c8df2 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -3,6 +3,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") filegroup( name = "all_files", @@ -319,8 +320,6 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":gpu_swapping_kernels", - ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", @@ -336,7 +335,10 @@ cc_library( "//tensorflow/core/grappler/costs:graph_properties", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/utils:traversal", - ], + ] + if_cuda([ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", + ]), ) tf_cc_test_gpu( -- GitLab From 49d4e9233cebdff001ffcc2e3d703e815ba0a881 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:04:09 -0800 Subject: [PATCH 039/311] Consolidate the builtin function overrides into a single module, and use a generic `dynamic_builtin` function to dispatch between implementations. Use the generic dispatcher in the generated code. PiperOrigin-RevId: 187104685 --- .../py2tf/converters/builtin_functions.py | 13 ++++--- tensorflow/contrib/py2tf/utils/BUILD | 12 +----- tensorflow/contrib/py2tf/utils/__init__.py | 4 +- .../py2tf/utils/{printing.py => builtins.py} | 32 +++++++++++++-- .../{printing_test.py => builtins_test.py} | 39 +++++++++++++++---- tensorflow/contrib/py2tf/utils/misc.py | 13 ------- tensorflow/contrib/py2tf/utils/misc_test.py | 27 +------------ 7 files changed, 72 insertions(+), 68 deletions(-) rename tensorflow/contrib/py2tf/utils/{printing.py => builtins.py} (62%) rename tensorflow/contrib/py2tf/utils/{printing_test.py => builtins_test.py} (56%) diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions.py b/tensorflow/contrib/py2tf/converters/builtin_functions.py index e69038aced..b5aa9756da 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions.py +++ b/tensorflow/contrib/py2tf/converters/builtin_functions.py @@ -36,23 +36,24 @@ class BuiltinFunctionTransformer(transformer.Base): # pylint:disable=invalid-name - def _convert_len(self, node): + def _convert_builtin(self, node): template = """ - py2tf_utils.dynamic_len(args) + py2tf_utils.dynamic_builtin(func, args) """ - return templates.replace(template, args=node.args)[0].value + return templates.replace(template, func=node.func, args=node.args)[0].value def _convert_print(self, node): template = """ - py2tf_utils.call_print(args) + py2tf_utils.dynamic_print(args) """ return templates.replace(template, args=node.args)[0].value def visit_Call(self, node): self.generic_visit(node) # TODO(mdan): This won't work if the function was hidden. - if isinstance(node.func, gast.Name) and node.func.id == 'len': - return self._convert_len(node) + if isinstance(node.func, gast.Name) and node.func.id in ('len',): + return self._convert_builtin(node) + # Print needs to be handled separately because it can be read as statement. if isinstance(node.func, gast.Name) and node.func.id == 'print': return self._convert_print(node) return node diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index c2fdd40707..2086a9ef60 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -20,10 +20,10 @@ py_library( name = "utils", srcs = [ "__init__.py", + "builtins.py", "context_managers.py", "misc.py", "multiple_dispatch.py", - "printing.py", "py_func.py", "tensor_list.py", "type_check.py", @@ -76,16 +76,6 @@ py_test( ], ) -py_test( - name = "printing_test", - srcs = ["printing_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:client_testlib", - ], -) - py_test( name = "type_check_test", srcs = ["type_check_test.py"], diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index d931322bf3..19bf2272bc 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -18,11 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf.utils.builtins import dynamic_builtin +from tensorflow.contrib.py2tf.utils.builtins import dynamic_print from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_on_returns from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while -from tensorflow.contrib.py2tf.utils.printing import call_print from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/printing.py b/tensorflow/contrib/py2tf/utils/builtins.py similarity index 62% rename from tensorflow/contrib/py2tf/utils/printing.py rename to tensorflow/contrib/py2tf/utils/builtins.py index 95a62bd80b..0a50b80b60 100644 --- a/tensorflow/contrib/py2tf/utils/printing.py +++ b/tensorflow/contrib/py2tf/utils/builtins.py @@ -12,14 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow printing support utilities.""" +"""Builtin conversion utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.util import tf_inspect + + +def dynamic_builtin(f, *args, **kwargs): + """Converts a builtin function call inline.""" + if not tf_inspect.isbuiltin(f): + return f(*args, **kwargs) + + if f is len: + return dynamic_len(*args, **kwargs) + + raise NotImplementedError('The "%s" builtin is not yet supported.' % f) + + +def dynamic_len(list_or_tensor): + """Implementation of len using dynamic dispatch.""" + if tensor_util.is_tensor(list_or_tensor): + shape = list_or_tensor.shape + if not shape: + raise ValueError( + 'len requires non-zero rank for tensor "%s"' % list_or_tensor) + return array_ops.shape(list_or_tensor)[0] + + return len(list_or_tensor) def is_tf_print_compatible(value): @@ -30,8 +56,8 @@ def is_tf_print_compatible(value): return False -def call_print(*values): - """Compiled counterpart of the print builtin. +def dynamic_print(*values): + """Implementartion of print using dynamic dispatch. The function attempts to use tf.Print if all the values are compatible. Otherwise, it will fall back to py_func. diff --git a/tensorflow/contrib/py2tf/utils/printing_test.py b/tensorflow/contrib/py2tf/utils/builtins_test.py similarity index 56% rename from tensorflow/contrib/py2tf/utils/printing_test.py rename to tensorflow/contrib/py2tf/utils/builtins_test.py index 2070deb304..19a72c63ec 100644 --- a/tensorflow/contrib/py2tf/utils/printing_test.py +++ b/tensorflow/contrib/py2tf/utils/builtins_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for printing module.""" +"""Tests for builtins module.""" from __future__ import absolute_import from __future__ import division @@ -22,28 +22,53 @@ import sys import six -from tensorflow.contrib.py2tf.utils import printing +from tensorflow.contrib.py2tf.utils import builtins +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): +class BuiltinsTest(test.TestCase): - def test_call_print_tf(self): + def test_dynamic_len_tf_scalar(self): + a = constant_op.constant(1) + + with self.assertRaises(ValueError): + with self.test_session() as sess: + sess.run(builtins.dynamic_builtin(len, a)) + + def test_dynamic_len_tf_array(self): + a = constant_op.constant([1, 2, 3]) + + with self.test_session() as sess: + self.assertEqual(3, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_tf_matrix(self): + a = constant_op.constant([[1, 2], [3, 4]]) + + with self.test_session() as sess: + self.assertEqual(2, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_py_list(self): + a = [3] * 5 + + self.assertEqual(5, builtins.dynamic_builtin(len, a)) + + def test_dynamic_print_tf(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', 1)) + sess.run(builtins.dynamic_print('test message', 1)) self.assertEqual(out_capturer.getvalue(), 'test message 1\n') finally: sys.stdout = sys.__stdout__ - def test_call_print_py_func(self): + def test_dynamic_print_complex(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', [1, 2])) + sess.run(builtins.dynamic_print('test message', [1, 2])) self.assertEqual(out_capturer.getvalue(), 'test message [1, 2]\n') finally: sys.stdout = sys.__stdout__ diff --git a/tensorflow/contrib/py2tf/utils/misc.py b/tensorflow/contrib/py2tf/utils/misc.py index 7548048388..1b06caf0bd 100644 --- a/tensorflow/contrib/py2tf/utils/misc.py +++ b/tensorflow/contrib/py2tf/utils/misc.py @@ -19,22 +19,9 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops -def dynamic_len(list_or_tensor): - """Implementation of len using dynamic dispatch.""" - if tensor_util.is_tensor(list_or_tensor): - shape = list_or_tensor.shape - if not shape: - raise ValueError( - 'len requires non-zero rank for tensor "%s"' % list_or_tensor) - return array_ops.shape(list_or_tensor)[0] - - return len(list_or_tensor) - - def alias_tensors(*args): """Wrap any Tensor arguments with an identity op. diff --git a/tensorflow/contrib/py2tf/utils/misc_test.py b/tensorflow/contrib/py2tf/utils/misc_test.py index ec88e7cb74..8aedd4cd64 100644 --- a/tensorflow/contrib/py2tf/utils/misc_test.py +++ b/tensorflow/contrib/py2tf/utils/misc_test.py @@ -19,37 +19,12 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.python.framework.constant_op import constant from tensorflow.python.ops.variables import Variable from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): - - def test_dynamic_len_tf_scalar(self): - a = constant(1) - - with self.assertRaises(ValueError): - with self.test_session() as sess: - sess.run(dynamic_len(a)) - - def test_dynamic_len_tf_array(self): - a = constant([1, 2, 3]) - - with self.test_session() as sess: - self.assertEqual(3, sess.run(dynamic_len(a))) - - def test_dynamic_len_tf_matrix(self): - a = constant([[1, 2], [3, 4]]) - - with self.test_session() as sess: - self.assertEqual(2, sess.run(dynamic_len(a))) - - def test_dynamic_len_py_list(self): - a = [3] * 5 - - self.assertEqual(5, dynamic_len(a)) +class MiscTest(test.TestCase): def test_alias_single_tensor(self): a = constant(1) -- GitLab From c7c8f4e82ede4fec5b21f9acd61bcc221d87efdc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:27:20 -0800 Subject: [PATCH 040/311] Fix buffer assignment for conditional instruction. PiperOrigin-RevId: 187107432 --- .../compiler/xla/service/buffer_assignment.cc | 358 +++++++++--------- .../compiler/xla/service/copy_insertion.cc | 72 +++- 2 files changed, 241 insertions(+), 189 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index b1e693da9d..d44d3d71d9 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -48,6 +48,183 @@ using ::tensorflow::strings::HumanReadableNumBytes; using ::tensorflow::strings::Printf; using ::tensorflow::strings::StrAppend; +namespace { + +template +string ColocatedBufferSetsToString(const T& container, const char* title) { + string result; + StrAppend(&result, title, "\n"); + for (const auto& it : container) { + StrAppend(&result, "\t", it->ToString(), "\n"); + } + return result; +} + +// Walk the call graph of the HLO module and place each computation into either +// thread_local_computations or global_computations depending upon whether the +// computation requires thread-local allocations or global allocations. The +// elements in thread_local_computations and global_computations are in post +// order (if computation A has an instruction which calls computation B, then A +// will appear after B in the vector). +Status GatherComputationsByAllocationType( + const HloModule* module, + std::vector* thread_local_computations, + std::vector* global_computations) { + // Create a worklist of computations paired with whether the allocation must + // be thread-local. + std::deque> worklist; + worklist.push_back(std::make_pair(module->entry_computation(), + /*is_thread_local*/ false)); + + // Sets for quickly checking membership. Computations are returned in vectors + // for stable iteration. + FlatSet thread_local_set; + FlatSet global_set; + + while (!worklist.empty()) { + auto worklist_front = worklist.front(); + worklist.pop_front(); + const HloComputation* computation = worklist_front.first; + bool is_thread_local = worklist_front.second; + bool in_thread_local_set = thread_local_set.count(computation) > 0; + bool in_global_set = global_set.count(computation) > 0; + + // If the computation has already been added to the respective set, then + // nothing to do. + if ((is_thread_local && in_thread_local_set) || + (!is_thread_local && in_global_set)) { + continue; + } + + // If the computation has already been added to the other set this is an + // error condition because the global call to the computation (eg, + // while/call) may return a reference to one of the thread-local buffers to + // the calling computation which will become a dangling reference when the + // thread-local is deallocated with the call return. + if ((is_thread_local && in_global_set) || + (!is_thread_local && in_thread_local_set)) { + return InvalidArgument( + "computation %s has conflicting allocation requirements (global " + "and thread-local)", + computation->name().c_str()); + } + + if (is_thread_local) { + thread_local_set.insert(computation); + } else { + global_set.insert(computation); + } + + for (auto* instruction : computation->instructions()) { + for (HloComputation* subcomputation : + instruction->called_computations()) { + switch (instruction->opcode()) { + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kWhile: + // Call and while must be called from a computation with global + // allocations as they may return references to buffers inside the + // called computation which cannot be thread-local. + if (is_thread_local) { + return InvalidArgument( + "computation %s cannot contain call/while op because it " + "requires thread-local buffer allocations", + computation->name().c_str()); + } + worklist.push_back(std::make_pair(subcomputation, + false)); // Not thread local. + break; + case HloOpcode::kMap: + case HloOpcode::kReduce: + case HloOpcode::kReduceWindow: + case HloOpcode::kSelectAndScatter: + case HloOpcode::kFusion: + // Map/reduce etc computations are always thread-local. + worklist.push_back(std::make_pair(subcomputation, + true)); // Thread local. + break; + default: + return InternalError( + "Unexpected calling opcode: %s", + HloOpcodeString(instruction->opcode()).c_str()); + } + } + } + } + + // Add the computations to the vectors in post order. + for (auto* computation : module->MakeComputationPostOrder()) { + if (thread_local_set.count(computation) > 0) { + thread_local_computations->push_back(computation); + } else if (global_set.count(computation) > 0) { + global_computations->push_back(computation); + } + // If the computation is not reachable from the entry computation, then it + // will not appear in either thread_local_set or global_set. We don't bother + // assigning buffers for these. + } + return Status::OK(); +} + +// Checks that points-to set of 'instruction' is unambiguous and distinct +// (ensured by CopyInsertion), then adds the buffer from the points-to set at +// 'index' to 'colocated_set'. +const LogicalBuffer* AddBufferToColocatedSet( + const HloInstruction* instruction, const ShapeIndex& index, + const TuplePointsToAnalysis& points_to_analysis, + std::vector* colocated_set) { + // CopyInsertion ensures root points-to set is unambiguous and distinct. + const auto& points_to = points_to_analysis.GetPointsToSet(instruction); + DCHECK(!points_to.IsAmbiguous()); + colocated_set->push_back(points_to.element(index)[0]); + return colocated_set->back(); +} + +// Given the interference map of a graph (the list of interfering node indices +// for each node), perform graph coloring such that interfering nodes are +// assigned to different colors. Returns the assigned color of the nodes, where +// the colors are represented as integer values [0, color_count). +std::vector ColorInterferenceGraph( + const std::vector>& interference_map) { + const int64 node_count = interference_map.size(); + + // Sort the nodes such that we assign nodes with more interference first. This + // relies on the common heuristic of assigning the most constrained node + // first, but it would be good to investigate other ordering heuristics too. + std::vector nodes(node_count); + std::iota(nodes.begin(), nodes.end(), 0); + std::sort(nodes.begin(), nodes.end(), + [&interference_map](const int64 i, const int64 j) { + return interference_map[i].size() > interference_map[j].size(); + }); + + const int64 kColorUnassigned = -1; + std::vector assigned_colors(node_count, kColorUnassigned); + for (int64 node : nodes) { + // Mark the colors that are already assigned to the neighbors. + std::vector available_colors(node_count, true); + for (int64 neighbor : interference_map[node]) { + int64 color = assigned_colors[neighbor]; + if (color != kColorUnassigned) { + available_colors[color] = false; + } + } + + // Find the color that is not yet assigned to the neighbors. + int64 color = kColorUnassigned; + for (color = 0; color < available_colors.size(); ++color) { + if (available_colors[color]) { + break; + } + } + CHECK_NE(color, kColorUnassigned); + assigned_colors[node] = color; + } + return assigned_colors; +} + +} // namespace + size_t BufferAllocation::Slice::Hasher::operator()(Slice s) const { uint64 h = std::hash()(s.index()); h = tensorflow::Hash64Combine(h, std::hash()(s.offset())); @@ -523,116 +700,6 @@ BufferAssignmentProto BufferAssignment::ToProto() const { return proto; } -namespace { - -// Walk the call graph of the HLO module and place each computation into either -// thread_local_computations or global_computations depending upon whether the -// computation requires thread-local allocations or global allocations. The -// elements in thread_local_computations and global_computations are in post -// order (if computation A has an instruction which calls computation B, then A -// will appear after B in the vector). -Status GatherComputationsByAllocationType( - const HloModule* module, - std::vector* thread_local_computations, - std::vector* global_computations) { - // Create a worklist of computations paired with whether the allocation must - // be thread-local. - std::deque> worklist; - worklist.push_back(std::make_pair(module->entry_computation(), - /*is_thread_local*/ false)); - - // Sets for quickly checking membership. Computations are returned in vectors - // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; - - while (!worklist.empty()) { - auto worklist_front = worklist.front(); - worklist.pop_front(); - const HloComputation* computation = worklist_front.first; - bool is_thread_local = worklist_front.second; - bool in_thread_local_set = thread_local_set.count(computation) > 0; - bool in_global_set = global_set.count(computation) > 0; - - // If the computation has already been added to the respective set, then - // nothing to do. - if ((is_thread_local && in_thread_local_set) || - (!is_thread_local && in_global_set)) { - continue; - } - - // If the computation has already been added to the other set this is an - // error condition because the global call to the computation (eg, - // while/call) may return a reference to one of the thread-local buffers to - // the calling computation which will become a dangling reference when the - // thread-local is deallocated with the call return. - if ((is_thread_local && in_global_set) || - (!is_thread_local && in_thread_local_set)) { - return InvalidArgument( - "computation %s has conflicting allocation requirements (global " - "and thread-local)", - computation->name().c_str()); - } - - if (is_thread_local) { - thread_local_set.insert(computation); - } else { - global_set.insert(computation); - } - - for (auto* instruction : computation->instructions()) { - for (HloComputation* subcomputation : - instruction->called_computations()) { - switch (instruction->opcode()) { - case HloOpcode::kCall: - case HloOpcode::kConditional: - case HloOpcode::kWhile: - // Call and while must be called from a computation with global - // allocations as they may return references to buffers inside the - // called computation which cannot be thread-local. - if (is_thread_local) { - return InvalidArgument( - "computation %s cannot contain call/while op because it " - "requires thread-local buffer allocations", - computation->name().c_str()); - } - worklist.push_back(std::make_pair(subcomputation, - false)); // Not thread local. - break; - case HloOpcode::kMap: - case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: - case HloOpcode::kSelectAndScatter: - case HloOpcode::kFusion: - // Map/reduce etc computations are always thread-local. - worklist.push_back(std::make_pair(subcomputation, - true)); // Thread local. - break; - default: - return InternalError( - "Unexpected calling opcode: %s", - HloOpcodeString(instruction->opcode()).c_str()); - } - } - } - } - - // Add the computations to the vectors in post order. - for (auto* computation : module->MakeComputationPostOrder()) { - if (thread_local_set.count(computation) > 0) { - thread_local_computations->push_back(computation); - } else if (global_set.count(computation) > 0) { - global_computations->push_back(computation); - } - // If the computation is not reachable from the entry computation, then it - // will not appear in either thread_local_set or global_set. We don't bother - // assigning buffers for these. - } - return Status::OK(); -} - -} // namespace - /* static */ StatusOr> BufferAssigner::Run( const HloModule* module, std::unique_ptr hlo_ordering, @@ -1085,7 +1152,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( if (colocated_set.empty()) { return; } - + VLOG(5) << ColocatedBufferSetsToString(colocated_set, + "Adding colocated buffer set"); // Find existing sets that overlap with at least one buffer from the // colocated_set. The resulting 'overlap_set_indices' will have at most // colocated_buffer_sets->size() entries, and will be in increasing order. @@ -1093,6 +1161,10 @@ void BufferAssigner::AddSetToColocatedBufferSets( for (size_t index = 0; index < colocated_buffer_sets->size(); ++index) { for (const LogicalBuffer* buffer : colocated_set) { if ((*colocated_buffer_sets)[index].count(buffer) > 0) { + VLOG(5) << "Found overlap with existing set on buffer " + << buffer->ToString() << "\n" + << ColocatedBufferSetsToString((*colocated_buffer_sets)[index], + "Overlapping set"); overlap_set_indices.push_back(index); break; } @@ -1104,6 +1176,7 @@ void BufferAssigner::AddSetToColocatedBufferSets( colocated_buffer_sets->emplace_back(); colocated_buffer_sets->back().insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << "No overlap found, new group created"; return; } @@ -1115,6 +1188,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( first->insert(overlap_set.begin(), overlap_set.end()); } first->insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << ColocatedBufferSetsToString( + *first, "Result of the colocated buffer set merging"); // Remove overlap sets that we just merged. The offset accounts for the fact // that as elements are erased, the indices need to be adjusted. Keep in mind @@ -1125,67 +1200,6 @@ void BufferAssigner::AddSetToColocatedBufferSets( } } -namespace { - -// Checks that points-to set of 'instruction' is unambiguous and distinct -// (ensured by CopyInsertion), then adds the buffer from the points-to set at -// 'index' to 'colocated_set'. -const LogicalBuffer* AddBufferToColocatedSet( - const HloInstruction* instruction, const ShapeIndex& index, - const TuplePointsToAnalysis& points_to_analysis, - std::vector* colocated_set) { - // CopyInsertion ensures root points-to set is unambiguous and distinct. - const auto& points_to = points_to_analysis.GetPointsToSet(instruction); - DCHECK(!points_to.IsAmbiguous()); - colocated_set->push_back(points_to.element(index)[0]); - return colocated_set->back(); -} - -// Given the interference map of a graph (the list of interfering node indices -// for each node), perform graph coloring such that interfering nodes are -// assigned to different colors. Returns the assigned color of the nodes, where -// the colors are represented as integer values [0, color_count). -std::vector ColorInterferenceGraph( - const std::vector>& interference_map) { - const int64 node_count = interference_map.size(); - - // Sort the nodes such that we assign nodes with more interference first. This - // relies on the common heuristic of assigning the most constrained node - // first, but it would be good to investigate other ordering heuristics too. - std::vector nodes(node_count); - std::iota(nodes.begin(), nodes.end(), 0); - std::sort(nodes.begin(), nodes.end(), - [&interference_map](const int64 i, const int64 j) { - return interference_map[i].size() > interference_map[j].size(); - }); - - const int64 kColorUnassigned = -1; - std::vector assigned_colors(node_count, kColorUnassigned); - for (int64 node : nodes) { - // Mark the colors that are already assigned to the neighbors. - std::vector available_colors(node_count, true); - for (int64 neighbor : interference_map[node]) { - int64 color = assigned_colors[neighbor]; - if (color != kColorUnassigned) { - available_colors[color] = false; - } - } - - // Find the color that is not yet assigned to the neighbors. - int64 color = kColorUnassigned; - for (color = 0; color < available_colors.size(); ++color) { - if (available_colors[color]) { - break; - } - } - CHECK_NE(color, kColorUnassigned); - assigned_colors[node] = color; - } - return assigned_colors; -} - -} // namespace - std::vector BufferAssigner::MergeColocatedBufferSets( const std::vector& colocated_buffer_sets, diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index cc195879a6..df73c28597 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -58,6 +58,45 @@ bool ValueIsReadOnly(const HloValue& value) { return IsConstantValue(value) || IsEntryParameterValue(value); } +// Data structure describing the action which should be taken on parts of a +// computation buffers, with respect to the adding of special case copies. +struct SpecialCaseCopyPolicy { + // Insert a copy if the same buffer is found at multiple indices within the + // output tuple. + bool copy_root_replicated_buffers = false; + // If true, insert a copy if a buffer coming from a constant or a parameter + // is found wihtin the output tuple. + bool copy_parameters_and_constants = false; +}; + +SpecialCaseCopyPolicy GetSpecialCaseCopyPolicy(const CallGraphNode& node, + HloModule* module, + HloComputation* computation) { + SpecialCaseCopyPolicy policy; + if (computation == module->entry_computation()) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + for (const CallSite& site : node.caller_callsites()) { + // The kWhile instruction does not have an handling here, as the + // AddCopiesForWhile() API takes care of adding its own copies. + if (site.instruction()->opcode() == HloOpcode::kConditional) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + } + return policy; +} + +bool ShouldCopyRootValue(const HloValue& value, + const SpecialCaseCopyPolicy& policy) { + if (policy.copy_parameters_and_constants) { + return IsConstantValue(value) || + value.defining_instruction()->opcode() == HloOpcode::kParameter; + } + return false; +} + // Deep copy the given instructions 'from' and 'to' at the ShapeIndexes given in // 'indices_to_copy'. Add control edges from the respective kCopy instructions // in deep copy of 'from' to the respective kCopy instruction in the deep copy @@ -957,7 +996,8 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { } TF_RET_CHECK(node.context() == CallContext::kSequential); - const bool is_entry = computation == module->entry_computation(); + SpecialCaseCopyPolicy policy = + GetSpecialCaseCopyPolicy(node, module, computation); HloInstruction* root = computation->root_instruction(); // Mark nondistinct/ambiguous indices. @@ -970,27 +1010,26 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { for (const HloBuffer* buffer : buffers_at_index) { buffer_seen_before |= !seen.insert(buffer).second; } - if (buffers_at_index.size() > 1 || (buffer_seen_before && is_entry)) { - VLOG(2) << "Index " << index << " of root of computation " + if (buffers_at_index.size() > 1 || + (buffer_seen_before && policy.copy_root_replicated_buffers)) { + VLOG(2) << "Index " << index << " of computation " << computation->name() << " (" << root->name() << ") has ambiguous or non-distinct buffer. Copying."; add_index_to_copy(root, index); } }); - // For entry instructions, mark any parameter or constant values. - if (is_entry) { - for (const auto& pair : - alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { - const ShapeIndex& index = pair.first; - const HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (ValueIsReadOnly(*value)) { - VLOG(2) << "Root of entry computation (" << root->name() - << ") has constant or entry parameter value at index " - << index << ". Copying."; - add_index_to_copy(root, index); - } + for (const auto& pair : + alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (ShouldCopyRootValue(*value, policy)) { + VLOG(2) << "Root of (" << root->name() << ") of computation(" + << computation->name() + << ") has constant or parameter value at index " << index + << ". Copying."; + add_index_to_copy(root, index); } } } @@ -1012,7 +1051,6 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { instruction->parent()->set_root_instruction(deep_copy); } } - return Status::OK(); } -- GitLab From dedace82ecf34c7906647361a811c8bf99f13da7 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 17:55:31 -0800 Subject: [PATCH 041/311] [XLA::Interpreter] Add support for kConditional to HloEvaluator. Also enable xla/tests/conditional_tests to run on interpreter. PiperOrigin-RevId: 187110438 --- .../compiler/xla/service/hlo_evaluator.cc | 28 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 1 + 3 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index cf8b35908f..afbfdac05e 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2491,6 +2491,34 @@ Status HloEvaluator::HandleCall(HloInstruction* call) { return Status::OK(); } +Status HloEvaluator::HandleConditional(HloInstruction* conditional) { + const auto& pred = GetEvaluatedLiteralFor(conditional->operand(0)); + const auto& true_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(1)); + const auto& false_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(2)); + + auto* true_computation = conditional->true_computation(); + auto* false_computation = conditional->false_computation(); + + auto result = Literal::CreateFromShape(conditional->shape()); + HloEvaluator embedded_evaluator; + if (pred.Get({})) { + result = embedded_evaluator + .Evaluate(*true_computation, + {&true_computation_arg}) + .ConsumeValueOrDie(); + } else { + result = embedded_evaluator + .Evaluate(*false_computation, + {&false_computation_arg}) + .ConsumeValueOrDie(); + } + + evaluated_[conditional] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index c65d9915e3..fc82011630 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleConditional(HloInstruction* conditional) override; + Status HandleCall(HloInstruction* call) override; private: diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index f3ecfc1604..19b3dfae4e 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -478,6 +478,7 @@ xla_test( xla_test( name = "conditional_test", srcs = ["conditional_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From 4aa3d3ce252a9af2e09cdbd5460262ccb5378a3a Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 17:56:15 -0800 Subject: [PATCH 042/311] Support configurable stats publishers in the grpc server. PiperOrigin-RevId: 187110497 --- .../distributed_runtime/rpc/grpc_server_lib.cc | 15 ++++++++++++--- .../distributed_runtime/rpc/grpc_server_lib.h | 6 ++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index c4ac92d809..a6f4be3eaf 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -106,7 +106,8 @@ GrpcServer::~GrpcServer() { Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, - const WorkerCreationFunction& worker_func) { + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory) { mutex_lock l(mu_); CHECK_EQ(state_, NEW); master_env_.env = env_; @@ -218,7 +219,7 @@ Status GrpcServer::Init( master_env_.ops = OpRegistry::Global(); master_env_.worker_cache = worker_cache; master_env_.master_session_factory = - [config]( + [config, stats_factory]( SessionOptions options, const MasterEnv* env, std::unique_ptr>> remote_devs, std::unique_ptr worker_cache, @@ -226,7 +227,7 @@ Status GrpcServer::Init( options.config.MergeFrom(config); return new MasterSession(options, env, std::move(remote_devs), std::move(worker_cache), std::move(device_set), - CreateNoOpStatsPublisher); + stats_factory); }; master_env_.worker_cache_factory = [this](const WorkerCacheFactoryOptions& options, @@ -241,6 +242,14 @@ Status GrpcServer::Init( return Status::OK(); } +Status GrpcServer::Init( + ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func) { + return Init(std::move(service_func), rendezvous_mgr_func, worker_func, + CreateNoOpStatsPublisher); +} + Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func) { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h index 8b12ac1461..7c2f06f618 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h @@ -22,6 +22,7 @@ limitations under the License. #include "grpc++/security/credentials.h" #include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/stats_publisher_interface.h" #include "tensorflow/core/distributed_runtime/master_env.h" #include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h" @@ -68,6 +69,11 @@ class GrpcServer : public ServerInterface { const string target() const override; protected: + Status Init(ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory); + Status Init(ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, const WorkerCreationFunction& worker_func); -- GitLab From 19f18e377d8ee2f624406527b21444128da344df Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 26 Feb 2018 18:04:55 -0800 Subject: [PATCH 043/311] Modify retrain script to output TFLite compatible quantized models. -Also fix flaky input name selection introduced by last PR. -Also rely on tf.contrib.quantize to do graph transformations. -Also, update retrain script to use new float mobilenet_v1 and quantized mobilenet_v1 models. PiperOrigin-RevId: 187111533 --- .../examples/image_retraining/retrain.py | 317 +++++++++++------- .../examples/image_retraining/retrain_test.py | 44 ++- 2 files changed, 229 insertions(+), 132 deletions(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 25e09fecbf..99a71206ac 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -75,13 +75,16 @@ python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` -Run quantized version of mobilenet: +Run mobilenet, instrumented for quantization: ```bash python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized + --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quant ``` +These instrumented models can be converted to fully quantized mobile models via +TensorFlow Lite. + There are 32 different Mobilenet models to choose from, with a variety of file size and latency options. The first number can be '1.0', '0.75', '0.50', or '0.25' to control the size, and the second controls the input image size, either @@ -121,7 +124,6 @@ import numpy as np from six.moves import urllib import tensorflow as tf -from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile @@ -135,6 +137,9 @@ FLAGS = None # need to update these to reflect the values in the network you're using. MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 # ~134M +# The location where variable checkpoints will be stored. +CHECKPOINT_NAME = '/tmp/_retrain_checkpoint' + def create_image_lists(image_dir, testing_percentage, validation_percentage): """Builds a list of training images from the file system. @@ -745,9 +750,9 @@ def variable_summaries(var): tf.summary.histogram('histogram', var) -def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size, quantize_layer): - """Adds a new softmax and fully-connected layer for training. +def add_final_retrain_ops(class_count, final_tensor_name, bottleneck_tensor, + bottleneck_tensor_size, quantize_layer, is_training): + """Adds a new softmax and fully-connected layer for training and eval. We need to retrain the top layer to identify our new classes, so this function adds the right operations to the graph, along with some variables to hold the @@ -763,7 +768,9 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, bottleneck_tensor: The output of the main CNN graph. bottleneck_tensor_size: How many entries in the bottleneck vector. quantize_layer: Boolean, specifying whether the newly added layer should be - quantized. + instrumented for quantized. + is_training: Boolean, specifying whether the newly add layer is for training + or eval. Returns: The tensors for the training and cross entropy results, and tensors for the @@ -778,50 +785,41 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, ground_truth_input = tf.placeholder( tf.int64, [None], name='GroundTruthInput') - # Organizing the following ops as `final_training_ops` so they're easier - # to see in TensorBoard - layer_name = 'final_training_ops' + # Organizing the following ops so they are easier to see in TensorBoard. + layer_name = 'final_retrain_ops' with tf.name_scope(layer_name): with tf.name_scope('weights'): initial_value = tf.truncated_normal( [bottleneck_tensor_size, class_count], stddev=0.001) layer_weights = tf.Variable(initial_value, name='final_weights') - if quantize_layer: - quantized_layer_weights = quant_ops.MovingAvgQuantize( - layer_weights, is_training=True) - variable_summaries(quantized_layer_weights) - variable_summaries(layer_weights) + with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') - if quantize_layer: - quantized_layer_biases = quant_ops.MovingAvgQuantize( - layer_biases, is_training=True) - variable_summaries(quantized_layer_biases) - variable_summaries(layer_biases) with tf.name_scope('Wx_plus_b'): - if quantize_layer: - logits = tf.matmul(bottleneck_input, - quantized_layer_weights) + quantized_layer_biases - logits = quant_ops.MovingAvgQuantize( - logits, - init_min=-32.0, - init_max=32.0, - is_training=True, - num_bits=8, - narrow_range=False, - ema_decay=0.5) - tf.summary.histogram('pre_activations', logits) - else: - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) + logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases + tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) + # The tf.contrib.quantize functions rewrite the graph in place for + # quantization. The imported model graph has already been rewritten, so upon + # calling these rewrites, only the newly added final layer will be + # transformed. + if quantize_layer: + if is_training: + tf.contrib.quantize.create_training_graph() + else: + tf.contrib.quantize.create_eval_graph() + tf.summary.histogram('activations', final_tensor) + # If this is an eval graph, we don't need to add loss ops or an optimizer. + if not is_training: + return None, None, bottleneck_input, ground_truth_input, final_tensor + with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) @@ -857,13 +855,91 @@ def add_evaluation_step(result_tensor, ground_truth_tensor): return evaluation_step, prediction -def save_graph_to_file(sess, graph, graph_file_name): +def run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor): + """Runs a final evaluation on an eval graph using the test data set. + + Args: + sess: Session for the train graph. + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + image_lists: Dictionary of training images for each label. + jpeg_data_tensor: The layer to feed jpeg image data into. + decoded_image_tensor: The output of decoding and resizing the image. + resized_image_tensor: The input node of the recognition graph. + bottleneck_tensor: The bottleneck output layer of the CNN graph. + """ + (sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) = build_eval_session(model_info, class_count) + + test_bottlenecks, test_ground_truth, test_filenames = ( + get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, + 'testing', FLAGS.bottleneck_dir, + FLAGS.image_dir, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor, FLAGS.architecture)) + test_accuracy, predictions = sess.run( + [evaluation_step, prediction], + feed_dict={ + bottleneck_input: test_bottlenecks, + ground_truth_input: test_ground_truth + }) + tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % + (test_accuracy * 100, len(test_bottlenecks))) + + if FLAGS.print_misclassified_test_images: + tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') + for i, test_filename in enumerate(test_filenames): + if predictions[i] != test_ground_truth[i]: + tf.logging.info('%70s %s' % (test_filename, + list(image_lists.keys())[predictions[i]])) + + +def build_eval_session(model_info, class_count): + """Builds an restored eval session without train operations for exporting. + + Args: + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + + Returns: + Eval session containing the restored eval graph. + The bottleneck input, ground truth, eval step, and prediction tensors. + """ + # If quantized, we need to create the correct eval graph for exporting. + eval_graph, bottleneck_tensor, _ = create_model_graph(model_info) + + eval_sess = tf.Session(graph=eval_graph) + with eval_graph.as_default(): + # Add the new layer for exporting. + (_, _, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + False) + + # Now we need to restore the values from the training graph to the eval + # graph. + tf.train.Saver().restore(eval_sess, CHECKPOINT_NAME) + + evaluation_step, prediction = add_evaluation_step(final_tensor, + ground_truth_input) + + return (eval_sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) + + +def save_graph_to_file(graph, graph_file_name, model_info, class_count): + """Saves an graph to file, creating a valid quantized one if necessary.""" + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + output_graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) with gfile.FastGFile(graph_file_name, 'wb') as f: f.write(output_graph_def.SerializeToString()) - return def prepare_file_system(): @@ -916,11 +992,10 @@ def create_model_info(architecture): return None version_string = parts[1] if (version_string != '1.0' and version_string != '0.75' and - version_string != '0.50' and version_string != '0.25'): + version_string != '0.5' and version_string != '0.25'): tf.logging.error( - """"The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25', - but found '%s' for architecture '%s'""", - version_string, architecture) + """"The Mobilenet version should be '1.0', '0.75', '0.5', or '0.25', + but found '%s' for architecture '%s'""", version_string, architecture) return None size_string = parts[2] if (size_string != '224' and size_string != '192' and @@ -933,35 +1008,26 @@ def create_model_info(architecture): if len(parts) == 3: is_quantized = False else: - if parts[3] != 'quantized': + if parts[3] != 'quant': tf.logging.error( "Couldn't understand architecture suffix '%s' for '%s'", parts[3], architecture) return None is_quantized = True + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/' + model_name = 'mobilenet_v1_' + version_string + '_' + size_string if is_quantized: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'Placeholder:0' - model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + - '_quantized_frozen') - model_base_name = 'quantized_frozen_graph.pb' - - else: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'input:0' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string - model_base_name = 'frozen_graph.pb' + model_name += '_quant' + data_url += model_name + '.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'input:0' + model_file_name = model_name + '_frozen.pb' bottleneck_tensor_size = 1001 input_width = int(size_string) input_height = int(size_string) input_depth = 3 - model_file_name = os.path.join(model_dir_name, model_base_name) input_mean = 127.5 input_std = 127.5 else: @@ -1011,43 +1077,45 @@ def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, return jpeg_data, mul_image -def export_model(sess, architecture, saved_model_dir): +def export_model(model_info, class_count, saved_model_dir): """Exports model for serving. Args: - sess: Current active TensorFlow Session. - architecture: Model architecture. + model_info: The modelinfo for the current model. + class_count: The number of classes. saved_model_dir: Directory in which to save exported model and variables. """ - if architecture == 'inception_v3': - input_tensor = 'DecodeJpeg/contents:0' - elif architecture.startswith('mobilenet_'): - input_tensor = 'input:0' - else: - raise ValueError('Unknown architecture', architecture) - in_image = sess.graph.get_tensor_by_name(input_tensor) - inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} - - out_classes = sess.graph.get_tensor_by_name('final_result:0') - outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)} + # The SavedModel should hold the eval graph. + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + with graph.as_default(): + input_tensor = model_info['resized_input_tensor_name'] + in_image = sess.graph.get_tensor_by_name(input_tensor) + inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} + + out_classes = sess.graph.get_tensor_by_name('final_result:0') + outputs = { + 'prediction': tf.saved_model.utils.build_tensor_info(out_classes) + } - signature = tf.saved_model.signature_def_utils.build_signature_def( - inputs=inputs, - outputs=outputs, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) + signature = tf.saved_model.signature_def_utils.build_signature_def( + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) - legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') + legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') - # Save out the SavedModel. - builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - signature - }, - legacy_init_op=legacy_init_op) - builder.save() + # Save out the SavedModel. + builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) + builder.add_meta_graph_and_variables( + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants. + DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature + }, + legacy_init_op=legacy_init_op) + builder.save() def main(_): @@ -1064,11 +1132,6 @@ def main(_): tf.logging.error('Did not recognize architecture flag') return -1 - # Set up the pre-trained graph. - maybe_download_and_extract(model_info['data_url']) - graph, bottleneck_tensor, resized_image_tensor = ( - create_model_graph(model_info)) - # Look at the folder structure, and create lists of all the images. image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, FLAGS.validation_percentage) @@ -1087,6 +1150,19 @@ def main(_): FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, FLAGS.random_brightness) + # Set up the pre-trained graph. + maybe_download_and_extract(model_info['data_url']) + graph, bottleneck_tensor, resized_image_tensor = ( + create_model_graph(model_info)) + + # Add the new layer that we'll be training. + with graph.as_default(): + (train_step, cross_entropy, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + True) + with tf.Session(graph=graph) as sess: # Set up the image decoding sub-graph. jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding( @@ -1110,15 +1186,8 @@ def main(_): decoded_image_tensor, resized_image_tensor, bottleneck_tensor, FLAGS.architecture) - # Add the new layer that we'll be training. - (train_step, cross_entropy, bottleneck_input, ground_truth_input, - final_tensor) = add_final_training_ops( - len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer']) - # Create the operations we need to evaluate the accuracy of our new layer. - evaluation_step, prediction = add_evaluation_step( - final_tensor, ground_truth_input) + evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input) # Merge all the summaries and write them out to the summaries_dir merged = tf.summary.merge_all() @@ -1128,6 +1197,10 @@ def main(_): validation_writer = tf.summary.FileWriter( FLAGS.summaries_dir + '/validation') + # Create a train saver that is used to restore values into an eval graph + # when exporting models. + train_saver = tf.train.Saver() + # Set up all our weights to their initial default values. init = tf.global_variables_initializer() sess.run(init) @@ -1168,6 +1241,9 @@ def main(_): (datetime.now(), i, train_accuracy * 100)) tf.logging.info('%s: Step %d: Cross entropy = %f' % (datetime.now(), i, cross_entropy_value)) + # TODO(suharshs): Make this use an eval graph, to avoid quantization + # moving averages being updated by the validation set, though in + # practice this makes a negligable difference. validation_bottlenecks, validation_ground_truth, _ = ( get_random_cached_bottlenecks( sess, image_lists, FLAGS.validation_batch_size, 'validation', @@ -1190,42 +1266,32 @@ def main(_): if (intermediate_frequency > 0 and (i % intermediate_frequency == 0) and i > 0): + # If we want to do an intermediate save, save a checkpoint of the train + # graph, to restore into the eval graph. + train_saver.save(sess, CHECKPOINT_NAME) intermediate_file_name = (FLAGS.intermediate_output_graphs_dir + 'intermediate_' + str(i) + '.pb') tf.logging.info('Save intermediate result to : ' + intermediate_file_name) - save_graph_to_file(sess, graph, intermediate_file_name) + save_graph_to_file(graph, intermediate_file_name, model_info, + class_count) + + # After training is complete, force one last save of the train checkpoint. + train_saver.save(sess, CHECKPOINT_NAME) # We've completed all our training, so run a final test evaluation on # some new images we haven't used before. - test_bottlenecks, test_ground_truth, test_filenames = ( - get_random_cached_bottlenecks( - sess, image_lists, FLAGS.test_batch_size, 'testing', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture)) - test_accuracy, predictions = sess.run( - [evaluation_step, prediction], - feed_dict={bottleneck_input: test_bottlenecks, - ground_truth_input: test_ground_truth}) - tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % - (test_accuracy * 100, len(test_bottlenecks))) - - if FLAGS.print_misclassified_test_images: - tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') - for i, test_filename in enumerate(test_filenames): - if predictions[i] != test_ground_truth[i]: - tf.logging.info('%70s %s' % - (test_filename, - list(image_lists.keys())[predictions[i]])) + run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor) # Write out the trained graph and labels with the weights stored as # constants. - save_graph_to_file(sess, graph, FLAGS.output_graph) + save_graph_to_file(graph, FLAGS.output_graph, model_info, class_count) with gfile.FastGFile(FLAGS.output_labels, 'w') as f: f.write('\n'.join(image_lists.keys()) + '\n') - export_model(sess, FLAGS.architecture, FLAGS.saved_model_dir) + export_model(model_info, class_count, FLAGS.saved_model_dir) if __name__ == '__main__': @@ -1406,8 +1472,9 @@ if __name__ == '__main__': form 'mobilenet__[_quantized]'. For example, 'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224 pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much - less accurate, but smaller and faster network that's 920 KB on disk and - takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html + smaller and less accurate model, taking 128x128 images, and instrumented + for eventual quantization via TensorFlow Lite. + See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html for more information on Mobilenet.\ """) parser.add_argument( diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py index 8b8dd45fd7..fb7324c58a 100644 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ b/tensorflow/examples/image_retraining/retrain_test.py @@ -67,22 +67,52 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortResult:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOps(self, flags_mock): + def testAddFinalRetrainOps(self, flags_mock): with tf.Graph().as_default(): with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) + # Test creating final training op with quantization. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, False, + False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOpsQuantized(self, flags_mock): - with tf.Graph().as_default(): + def testAddFinalRetrainOpsQuantized(self, flags_mock): + # Ensure that the training and eval graph for quantized models are correctly + # created. + with tf.Graph().as_default() as g: + with tf.Session() as sess: + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization, set is_training to + # true. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, True) + self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + # Ensure that the inputs of each FakeQuant operations has 2 Assign + # operations in the training graph (Assign[Min,Max]Last, + # Assign[Min,Max]Ema) + self.assertEqual(2, + len([i for i in op.inputs if 'Assign' in i.name])) + self.assertEqual(found_fake_quant, 2) + with tf.Graph().as_default() as g: with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) + # Test creating final training op with quantization, set is_training to + # false. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + for i in op.inputs: + # Ensure that no operations are Assign operation since this is the + # evaluation graph. + self.assertTrue('Assign' not in i.name) + self.assertEqual(found_fake_quant, 2) def testAddEvaluationStep(self): with tf.Graph().as_default(): -- GitLab From 60a4b676df017b4ac51ca84a5e5e3a998912cebc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 18:05:59 -0800 Subject: [PATCH 044/311] Remove old implementation of the adaptive shared batcher, the in flight batches implemntation delivers similar performance but is simpler and requires less tuning. PiperOrigin-RevId: 187111685 --- .../adaptive_shared_batch_scheduler.h | 172 +----- .../adaptive_shared_batch_scheduler_test.cc | 488 +++++------------- 2 files changed, 140 insertions(+), 520 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 25c5f9cf42..661ed239d3 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -50,43 +50,26 @@ class ASBSQueue; // track of a number of queues (one per model or model version) which are // continuously enqueuing requests. The scheduler groups the requests into // batches which it periodically sends off for processing (see -// shared_batch_scheduler.h for more details). The AdaptiveSharedBatchScheduler -// prioritizes batches by age (i.e. the batch's oldest request) irrespective of -// queue or batch size. +// shared_batch_scheduler.h for more details). AdaptiveSharedBatchScheduler +// (ASBS) prioritizes batches by age (i.e. the batch's oldest request) +// irrespective of queue or batch size. // -// The scheduling decision currently exists in two flavors, controlled by the -// option use_in_flight_batches_implementation. It is expected that setting this -// option to true will give universally better results; after a period of -// testing to confirm, the old implementation will be removed. -// -// If use_in_flight_batches_implementation is set to true, the scheduler -// limits the number of batches which can be processed concurrently. If a new -// batch is created, and the number of in flight batches is below the limit, -// the next (i.e. oldest) batch is immediately scheduled. Similarly, when a -// batch finishes processing, the limit is rechecked, and another batch may be -// scheduled. To avoid the need to carefully tune the limit for workload, -// model type, platform, etc, it is dynamically adjusted in order to provide the -// lowest latency. -// -// If use_in_flight_batches_implementation is set to false, the scheduler will -// process the oldest batch at an adjustable rate, regardless of batch size. -// The user can provide feedback to help set this rate to achieve some goal -// (i.e. minimize overall latency, limit cpu usage, etc). The rate (or rather, -// the corresponding period) is adjusted each time a batch is processed, using -// an exponentially weighted moving average to smooth noisy feedback: -// ewma_feedback = ((N - 1) * ewma_feedback + feedback()) / N -// period *= (1 + K * emwa_feedback) +// ASBS tries to keep the system busy by maintaining an adjustable number of +// concurrently processed batches. If a new batch is created, and the number of +// in flight batches is below the target, the next (i.e. oldest) batch is +// immediately scheduled. Similarly, when a batch finishes processing, the +// target is rechecked, and another batch may be scheduled. To avoid the need +// to carefully tune the target for workload, model type, platform, etc, it is +// dynamically adjusted in order to provide the lowest average latency. // // Some potential use cases: // Hardware Accelerators (GPUs & TPUs) - If some phase of batch processing // involves serial processing by a device, from a latency perspective it is // desirable to keep the device evenly loaded, avoiding the need to wait for // the device to process prior batches. -// feedback = num_pending_on_device() - desired_pending. // CPU utilization - If the batch processing is cpu dominated, you can reap // latency gains when underutilized by increasing the processing rate, but // back the rate off when the load increases to avoid overload. -// feedback = cpu_rate() - desired_cpu_rate. template class AdaptiveSharedBatchScheduler @@ -101,13 +84,17 @@ class AdaptiveSharedBatchScheduler struct Options { // The name to use for the pool of batch threads. string thread_pool_name = {"batch_threads"}; - // Number of batch processing threads; equivalently the maximum number of - // concurrently running batches. + // Number of batch processing threads - the maximum value of + // in_flight_batches_limit_. It is recommended that this value be set by + // running the system under load, observing the learned value for + // in_flight_batches_limit_, and setting this maximum to ~ 2x the value. + // Under low load, in_flight_batches_limit_ has no substantial effect on + // latency and therefore undergoes a random walk. Unreasonably large values + // for num_batch_threads allows for large in_flight_batches_limit_, which + // will harm latency for some time once load increases again. int64 num_batch_threads = port::NumSchedulableCPUs(); // The environment to use (typically only overridden by test code). Env* env = Env::Default(); - // Which implementation to use (described in class comments above). - bool use_in_flight_batches_implementation = false; // Initial limit for number of batches being concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of // 3.2 results in an actual cap of 3 80% of the time, and 4 20% of the time. @@ -116,28 +103,6 @@ class AdaptiveSharedBatchScheduler // numbers will give less noisy latency measurements, but will be less // responsive to changes in workload. int64 batches_to_average_over = 1000; - - // TODO(kte): remove the rate based implementation and corresponding options - // below once testing confirms the superiority of the in flight batches - // implementation. - // Initial batch scheduling period in microseconds. Will be altered for - // non-zero rate_feedback. - double initial_scheduling_period_micros = 500; - // Minimum batch scheduling period in microseconds. Recommend setting this - // value greater than 0, otherwise it may take a while to recover from a - // sustained time of negative scheduling_period_feedback (which may occur - // under low load). - double min_scheduling_period_micros = 100; - // Maximum batch scheduling period in microseconds. - double max_scheduling_period_micros = 10000; - // Feedback function used to modify the scheduling period each time a batch - // is scheduled. Should return values roughly O(1), with positive values - // resulting in an increased period. - std::function scheduling_period_feedback{[] { return 0.; }}; - // To handle potentially noisy scheduling_period_feedback, the period is - // adjusted using an exponentially weighted moving average over the previous - // feedback_smoothing_batches batches. Must be greater than 0. - int64 feedback_smoothing_batches = 10; }; // Ownership is shared between the caller of Create() and any queues created @@ -171,17 +136,11 @@ class AdaptiveSharedBatchScheduler explicit AdaptiveSharedBatchScheduler(const Options& options); - // Batch scheduling function which runs every scheduling_period_ microseconds. - // Only used when options_.use_in_flight_batches_implementation == false. - void ProcessOneBatch(); - // Tracks processing latency and adjusts in_flight_batches_limit to minimize. - // Only used when options_.use_in_flight_batches_implementation == true. void CallbackWrapper(const internal::ASBSBatch* batch, BatchProcessor callback); // Schedules batch if in_flight_batches_limit_ is not met. - // Only used when options_.use_in_flight_batches_implementation == true. void MaybeScheduleNextBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_); // Notifies scheduler of non-empty batch which is eligible for processing. @@ -212,41 +171,22 @@ class AdaptiveSharedBatchScheduler mutex mu_; - // Responsible for running ProcessOneBatch. PeriodicFunction was used in order - // to check for deletion so that the thread can be shut down. - // Only used when options_.use_in_flight_batches_implementation == false. - std::unique_ptr scheduling_thread_; - // Responsible for running the batch processing callbacks. std::unique_ptr batch_thread_pool_; - // Time interval in microseconds between successive ProcessOneBatch calls. - // Only used when options_.use_in_flight_batches_implementation == false. - double scheduling_period_; - - // Exponentially weighted moving average of - // options_.scheduling_period_feedback() evaluated in each ProcessOneBatch - // call. - // Only used when options_.use_in_flight_batches_implementation == false. - double ewma_feedback_ = 0; - // Limit on number of batches which can be concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of 3.2 // results in an actual cap of 3 80% of the time, and 4 20% of the time. - // Only used when options_.use_in_flight_batches_implementation == true. double in_flight_batches_limit_ GUARDED_BY(mu_); // Number of batches currently being processed. - // Only used when options_.use_in_flight_batches_implementation == true. int64 in_flight_batches_ GUARDED_BY(mu_) = 0; // RNG engine and distribution. - // Only used when options_.use_in_flight_batches_implementation == true. std::default_random_engine rand_engine_; std::uniform_real_distribution rand_double_; // Fields controlling the dynamic adjustment of in_flight_batches_limit_. - // Only used when options_.use_in_flight_batches_implementation == true. // Number of batches since the last in_flight_batches_limit_ adjustment. int64 batch_count_ GUARDED_BY(mu_) = 0; // Sum of processing latency for batches counted by batch_count_. @@ -348,32 +288,6 @@ Status AdaptiveSharedBatchScheduler::Create( return errors::InvalidArgument("num_batch_threads must be positive; was ", options.num_batch_threads); } - if (options.min_scheduling_period_micros < 0) { - return errors::InvalidArgument( - "min_scheduling_period_micros must be >= 0; was ", - options.min_scheduling_period_micros); - } - if (options.min_scheduling_period_micros > - options.initial_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be >= min_scheduling_period_micros (", - options.min_scheduling_period_micros, ")"); - } - if (options.initial_scheduling_period_micros > - options.max_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be <= max_scheduling_period_micros (", - options.max_scheduling_period_micros, ")"); - } - if (options.feedback_smoothing_batches < 1) { - return errors::InvalidArgument( - "feedback_smoothing_batches must be positive; was ", - options.feedback_smoothing_batches); - } if (options.initial_in_flight_batches_limit > options.num_batch_threads) { return errors::InvalidArgument( "initial_in_flight_batches_limit (", @@ -401,20 +315,12 @@ template AdaptiveSharedBatchScheduler::AdaptiveSharedBatchScheduler( const Options& options) : options_(options), - scheduling_period_(options.initial_scheduling_period_micros), in_flight_batches_limit_(options.initial_in_flight_batches_limit), rand_double_(0.0, 1.0) { std::random_device device; rand_engine_.seed(device()); - PeriodicFunction::Options opts; - opts.thread_name_prefix = "scheduling_thread"; - opts.env = GetEnv(); batch_thread_pool_.reset(new thread::ThreadPool( GetEnv(), options.thread_pool_name, options.num_batch_threads)); - if (!options.use_in_flight_batches_implementation) { - scheduling_thread_.reset( - new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts)); - } } template @@ -443,9 +349,7 @@ void AdaptiveSharedBatchScheduler::AddBatch( const internal::ASBSBatch* batch) { mutex_lock l(mu_); batches_.push(batch); - if (options_.use_in_flight_batches_implementation) { - MaybeScheduleNextBatch(); - } + MaybeScheduleNextBatch(); } template @@ -523,44 +427,6 @@ void AdaptiveSharedBatchScheduler::CallbackWrapper( MaybeScheduleNextBatch(); } -template -void AdaptiveSharedBatchScheduler::ProcessOneBatch() { - static const double kFeedbackMultiplier = .001; - const internal::ASBSBatch* batch = nullptr; - BatchProcessor callback; - const int64 start_time_micros = GetEnv()->NowMicros(); - { - mutex_lock l(mu_); - if (!batches_.empty()) { - batch = batches_.top(); - batches_.pop(); - callback = queues_and_callbacks_[batch->queue()]; - } - } - if (batch != nullptr) { - double feedback = options_.scheduling_period_feedback(); - const int64 N = options_.feedback_smoothing_batches; - ewma_feedback_ = ((N - 1) * ewma_feedback_ + feedback) / N; - scheduling_period_ *= (1 + kFeedbackMultiplier * ewma_feedback_); - if (scheduling_period_ < options_.min_scheduling_period_micros) { - scheduling_period_ = options_.min_scheduling_period_micros; - } else if (scheduling_period_ > options_.max_scheduling_period_micros) { - scheduling_period_ = options_.max_scheduling_period_micros; - } - // Queue may destroy itself after ReleaseBatch is called. - batch->queue()->ReleaseBatch(batch); - batch_thread_pool_->Schedule([callback, batch] { - callback(std::unique_ptr>( - const_cast*>(batch))); - }); - } - const int64 sleep_time = - scheduling_period_ - (GetEnv()->NowMicros() - start_time_micros); - if (sleep_time > 0) { - GetEnv()->SleepForMicroseconds(sleep_time); - } -} - template bool AdaptiveSharedBatchScheduler::BatchCompare::operator()( const internal::ASBSBatch* a, diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc index 8ae8ca02ec..109234287e 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc @@ -64,59 +64,6 @@ std::unique_ptr CreateFakeClockAdvancerThread( })); } -TEST(AdaptiveSharedBatchSchedulerTest, Basic) { - for (const bool delete_scheduler_early : {false, true}) { - for (const bool delete_queue_1_early : {false, true}) { - int queue_0_tasks = 0; - auto queue_0_callback = - [&queue_0_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - }; - int queue_1_tasks = 0; - auto queue_1_callback = - [&queue_1_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - }; - { - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create({}, &scheduler)); - - // Create two queues. - std::unique_ptr> queue_0; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_0_callback, &queue_0)); - std::unique_ptr> queue_1; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_1_callback, &queue_1)); - - if (delete_scheduler_early) { - // Delete our copy of the scheduler. The queues should keep it alive - // under the covers. - scheduler = nullptr; - } - // Submit tasks to the two queues, and (optionally) remove the queues. - TF_ASSERT_OK(ScheduleTask(1, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(2, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(3, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(4, queue_1.get())); - if (delete_queue_1_early) { - queue_1 = nullptr; - } - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - } - EXPECT_EQ(queue_0_tasks, 9); - EXPECT_EQ(queue_1_tasks, 6); - } - } -} - TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { using Scheduler = AdaptiveSharedBatchScheduler; std::shared_ptr scheduler; @@ -124,24 +71,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { options.num_batch_threads = 0; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1000; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 100; - options.max_scheduling_period_micros = 50; - options.initial_scheduling_period_micros = 75; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.feedback_smoothing_batches = 0; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); options.initial_in_flight_batches_limit = 0.5; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); @@ -153,301 +82,8 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); } -TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue_0; - std::unique_ptr> queue_1; - int queue_0_tasks = 0; - int queue_1_tasks = 0; - auto queue_0_callback = [&queue_0_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - auto queue_1_callback = [&queue_1_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 0; - // Queue must have max_enqueued_batchs > 1. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0).ok()); - queue_options.max_enqueued_batches = 2; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0)); - EXPECT_EQ(10, queue_0->max_task_size()); - queue_options.max_batch_size = 0; - // Queue must have max_batch_size > 0. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1).ok()); - queue_options.max_batch_size = 2; - queue_options.max_enqueued_batches = 1; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(15, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - env.AdvanceByMicroseconds(1); - - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(3, queue_1.get()).ok()); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - env.AdvanceByMicroseconds(1); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(1, queue_1.get()).ok()); - - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(6, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(4, queue_0.get())); - - // Batches should be processed in order from oldest to newest. - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 0); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 2); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 19); - EXPECT_EQ(queue_1_tasks, 2); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, RateFeedback) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.min_scheduling_period_micros = 200; - options.max_scheduling_period_micros = 2000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 1; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 6 batches. - for (int i = 0; i < 6; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -500; - env.AdvanceByMicroseconds(994); - env.BlockUntilThreadsAsleep(2); // scheduling period = 500 usec. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(500); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 901); - feedback = 0; - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 902); - feedback = 10000; // large feedback should hit max_scheduling_period. - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 2000 usec. - EXPECT_EQ(scheduled_items, 903); - feedback = -10000; // large feedback should hit min_scheduling_period. - env.AdvanceByMicroseconds(1999); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 903); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); // scheduling period = 200 usec. - EXPECT_EQ(scheduled_items, 904); - env.AdvanceByMicroseconds(200); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 905); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, FeedbackSmoothing) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 3; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 4 batches. - for (int i = 0; i < 4; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -300; - env.AdvanceByMicroseconds(996); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 100, scheduling_period = 900. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(899); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 167, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 901); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 901); - feedback = 1000 / 3.; - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // emwa_feedback = 0, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 903); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 10; - TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 3 tasks. - EXPECT_EQ(queue->NumEnqueuedTasks(), 0); - EXPECT_EQ(queue->SchedulingCapacity(), 100); - TF_ASSERT_OK(ScheduleTask(5, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 1); - EXPECT_EQ(queue->SchedulingCapacity(), 95); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(6, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 2); - EXPECT_EQ(queue->SchedulingCapacity(), 84); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(1, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 3); - EXPECT_EQ(queue->SchedulingCapacity(), 83); - - env.AdvanceByMicroseconds(998); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 5); - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 7); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { +TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimit) { AdaptiveSharedBatchScheduler::Options options; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1000; mutex mu; @@ -476,7 +112,7 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { std::unique_ptr> queue; TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - // Enqueue 3 batches. + // Enqueue 3 tasks, should result in 3 batches. for (int i = 0; i < 3; i++) { TF_ASSERT_OK(ScheduleTask(100, queue.get())); } @@ -490,7 +126,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { { AdaptiveSharedBatchScheduler::Options options; options.env = &env; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1; auto queue_callback = [&env](std::unique_ptr> batch) { @@ -544,6 +179,125 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { } stop_teardown.Notify(); } + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteQueue) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::unique_ptr queue_deleter; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete queue, should be kept alive until empty. + queue_deleter.reset(Env::Default()->StartThread( + {}, "QueueDeleterThread", [&queue, &mu, &processed_batches] { + queue.reset(); + mutex_lock l(mu); + EXPECT_EQ(processed_batches, 2); + })); + // Give queue_deleter thread time to delete queue. + Env::Default()->SleepForMicroseconds(1000); + finish_processing.Notify(); +} + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteScheduler) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete scheduler, should be kept alive until queues are empty. + scheduler.reset(); + finish_processing.Notify(); + while (true) { + mutex_lock l(mu); + if (processed_batches == 2) break; + } +} + +TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + mu.lock(); + int batch_num = ++processed_batches; + mu.unlock(); + if (batch_num == 1) { + finish_processing.WaitForNotification(); + } + }; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // First batch was immediately processed, no longer counts as enqueued. + EXPECT_EQ(queue->NumEnqueuedTasks(), 1); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 900); + // Enqueue 2 more tasks, should fall in same batch. + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + TF_ASSERT_OK(ScheduleTask(200, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 3); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 600); + // Enqueue 1 more task, should create new batch. + TF_ASSERT_OK(ScheduleTask(700, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 4); + EXPECT_EQ(queue->SchedulingCapacity(), 8 * 1000 + 300); + finish_processing.Notify(); +} } // namespace anonymous } // namespace serving } // namespace tensorflow -- GitLab From 4a9d929868c57d742512d65634cceada8c11c6ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 19:46:27 -0800 Subject: [PATCH 045/311] Make sure rounding and handling of denormals in Grappler is the same as in TensorFlow. Enable constant folding for more types, particularly on GPUs. PiperOrigin-RevId: 187120456 --- tensorflow/core/grappler/op_types.cc | 6 +- .../grappler/optimizers/constant_folding.cc | 96 ++++++++++++------- tensorflow/core/kernels/constant_op.cc | 11 +++ 3 files changed, 74 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index e225e99a9e..9b3755ddce 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -354,7 +354,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { return false; } const OpDef* op_def = nullptr; - Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); + const string& op_name = node.op(); + Status status = OpRegistry::Global()->LookUpOpDef(op_name, &op_def); if (!status.ok()) { return false; } @@ -368,7 +369,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { } } // Some nodes do in-place updates on regular tensor inputs. - if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace")) { + if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace") || + StringPiece(op_name).starts_with("Inplace")) { return false; } return true; diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 10ca7dcce0..a5417aaa51 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -35,7 +35,9 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/setround.h" #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/bcast.h" @@ -51,7 +53,14 @@ class EigenThreadPoolWrapper : public Eigen::ThreadPoolInterface { explicit EigenThreadPoolWrapper(thread::ThreadPool* pool) : pool_(pool) {} ~EigenThreadPoolWrapper() override {} void Schedule(std::function fn) override { - pool_->Schedule(std::move(fn)); + auto wrapped = [=]() { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + fn(); + }; + pool_->Schedule(std::move(wrapped)); } int NumThreads() const override { return pool_->NumThreads(); } int CurrentThreadId() const override { return pool_->CurrentThreadId(); } @@ -292,16 +301,16 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // graph. const int node_count = graph_->node_size(); for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_->mutable_node(i); - const string op = node.op(); + NodeDef* node = graph_->mutable_node(i); + const string op = node->op(); if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") { continue; } const std::vector& output = - properties.GetOutputProperties(node.name()); + properties.GetOutputProperties(node->name()); const std::vector& input = - properties.GetInputProperties(node.name()); + properties.GetInputProperties(node->name()); if (input.empty() || output.empty()) { continue; } @@ -328,35 +337,35 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // could have multiple outputs). if (op == "Shape" || op == "Size" || op == "Rank") { // Replace the node with the corresponding constant. - node.set_op("Const"); - node.clear_attr(); - (*node.mutable_attr())["dtype"].set_type(type); + node->set_op("Const"); + node->clear_attr(); + (*node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( - (*node.mutable_attr())["value"].mutable_tensor()); + (*node->mutable_attr())["value"].mutable_tensor()); // Turn the data input into a control dependency: this is needed to // ensure that the constant value will only be run in the // cases where the shape/rank/size would have been run in // the original graph. Additional inputs are extra control string ctrl_dep = - AddControlDependency(node.input(0), graph_, node_map_.get()); - node.set_input(0, ctrl_dep); - node_map_->AddOutput(NodeName(ctrl_dep), node.name()); + AddControlDependency(node->input(0), graph_, node_map_.get()); + node->set_input(0, ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), node->name()); } else { - auto outputs = node_map_->GetOutputs(node.name()); + auto outputs = node_map_->GetOutputs(node->name()); for (const auto& output : outputs) { for (int k = 0; k < output->input_size(); ++k) { int port; string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port == j) { + if (node_name == node->name() && port == j) { // Create a const node as ShapeN's output if not already. const string const_name = - OptimizedNodeName(node, strings::StrCat("-matshapes-", j)); + OptimizedNodeName(*node, strings::StrCat("-matshapes-", j)); if (node_map_->GetNode(const_name) == nullptr) { NodeDef* added_node = graph_->add_node(); added_node->set_name(const_name); added_node->set_op("Const"); - added_node->set_device(node.device()); + added_node->set_device(node->device()); node_map_->AddNode(added_node->name(), added_node); (*added_node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( @@ -364,7 +373,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We add a control dependency to the original ShapeN node, // so that the node will only be run if all inputs of the // original ShapeN node are run. - string ctrl_dep = AddControlDependency(node.name(), graph_, + string ctrl_dep = AddControlDependency(node->name(), graph_, node_map_.get()); *added_node->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); @@ -679,7 +688,7 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { nodes_whitelist_.find(node.name()) == nodes_whitelist_.end()) { return false; } - // Skip control flow nodes, they can't be folded + // Skip control flow nodes, they can't be folded. if (ModifiesFrameInfo(node)) { return false; } @@ -688,12 +697,16 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Skips ops that don't benefit from folding. - const string& op = node.op(); + // Don't fold stateful ops such as TruncatedNormal. + if (!IsFreeOfSideEffect(node)) { + return false; + } - if (op.find("Placeholder") == 0) { + // Skips ops that don't benefit from folding. + if (IsPlaceholder(node)) { return false; } + const string& op = node.op(); if (op.find("Save") != string::npos || op.find("Restore") != string::npos || op.find("Reader") != string::npos) { return false; @@ -705,16 +718,12 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Don't fold stateful ops such as TruncatedNormal. const OpDef* op_def = nullptr; Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); if (!status.ok()) { return false; } - if (op_def->is_stateful()) { - return false; - } - + // Don't fold ops without outputs. if (op_def->output_arg_size() == 0) { return false; } @@ -779,8 +788,11 @@ Status CreateConstantTensorAttrValue(DataType type, double value, SET_TENSOR_VAL_CASE(DT_FLOAT, float, float); SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double); SET_TENSOR_VAL_CASE(DT_INT64, int64, int64); + SET_TENSOR_VAL_CASE(DT_UINT64, int64, int64); SET_TENSOR_VAL_CASE(DT_INT32, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT32, int32, int); SET_TENSOR_VAL_CASE(DT_INT16, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT16, int32, int); SET_TENSOR_VAL_CASE(DT_INT8, int32, int); SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); @@ -843,10 +855,16 @@ Status ConstantFolding::CreateNodeDef(const string& name, POPULATE_TENSOR_PROTO(tensor, t, double, double); case DT_INT64: POPULATE_TENSOR_PROTO(tensor, t, int64, int64); + case DT_UINT64: + POPULATE_TENSOR_PROTO(tensor, t, uint64, int64); case DT_INT32: POPULATE_TENSOR_PROTO(tensor, t, int32, int); + case DT_UINT32: + POPULATE_TENSOR_PROTO(tensor, t, uint32, int); case DT_INT16: POPULATE_TENSOR_PROTO(tensor, t, int16, int); + case DT_UINT16: + POPULATE_TENSOR_PROTO(tensor, t, uint16, int); case DT_INT8: POPULATE_TENSOR_PROTO(tensor, t, int8, int); case DT_UINT8: @@ -1166,9 +1184,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { std::unordered_set processed_nodes; std::deque queue; for (int i = 0; i < graph_->node_size(); i++) { - auto node = graph_->mutable_node(i); - if (IsFoldable(*node)) { - queue.push_back(node); + if (IsFoldable(graph_->node(i))) { + queue.push_back(graph_->mutable_node(i)); } } while (!queue.empty()) { @@ -1203,8 +1220,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { int last = output->node_size() - 1; for (int i = output->node_size() - 1; i >= 0; --i) { const NodeDef& node = output->node(i); - auto outputs = node_map_->GetOutputs(node.name()); - if (outputs.empty()) { + auto fanout = node_map_->GetOutputs(node.name()); + if (fanout.empty()) { output->mutable_node()->SwapElements(i, last); last--; } @@ -1216,8 +1233,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { // If no fetch nodes is provided, we conservatively // keep all nodes in the original graph in case users need to fetch // their values. - auto outputs = node_map_->GetOutputs(node.name()); - if (!outputs.empty() || !has_fetch_ || + auto fanout = node_map_->GetOutputs(node.name()); + if (!fanout.empty() || !has_fetch_ || nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { auto added_node = output->add_node(); *added_node = node; @@ -1331,14 +1348,14 @@ bool ConstantFolding::IsOnes(const NodeDef& node) const { // IS_ONES_CASE(DT_HALF); IS_ONES_CASE(DT_FLOAT); IS_ONES_CASE(DT_DOUBLE); + IS_ONES_CASE(DT_COMPLEX64); + IS_ONES_CASE(DT_COMPLEX128); IS_ONES_CASE(DT_UINT8); IS_ONES_CASE(DT_INT8); IS_ONES_CASE(DT_UINT16); IS_ONES_CASE(DT_INT16); IS_ONES_CASE(DT_INT32); IS_ONES_CASE(DT_INT64); - IS_ONES_CASE(DT_COMPLEX64); - IS_ONES_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1362,14 +1379,14 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const { // IS_ZEROS_CASE(DT_HALF); IS_ZEROS_CASE(DT_FLOAT); IS_ZEROS_CASE(DT_DOUBLE); + IS_ZEROS_CASE(DT_COMPLEX64); + IS_ZEROS_CASE(DT_COMPLEX128); IS_ZEROS_CASE(DT_UINT8); IS_ZEROS_CASE(DT_INT8); IS_ZEROS_CASE(DT_UINT16); IS_ZEROS_CASE(DT_INT16); IS_ZEROS_CASE(DT_INT32); IS_ZEROS_CASE(DT_INT64); - IS_ZEROS_CASE(DT_COMPLEX64); - IS_ZEROS_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1869,6 +1886,11 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index fdb03a5aae..312c1a41d3 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -105,7 +105,12 @@ REGISTER_KERNEL(GPU, int8); REGISTER_KERNEL(GPU, qint8); REGISTER_KERNEL(GPU, uint16); REGISTER_KERNEL(GPU, int16); +REGISTER_KERNEL(GPU, qint16); +REGISTER_KERNEL(GPU, quint16); +REGISTER_KERNEL(GPU, uint32); +REGISTER_KERNEL(GPU, qint32); REGISTER_KERNEL(GPU, int64); +REGISTER_KERNEL(GPU, uint64); REGISTER_KERNEL(GPU, complex64); REGISTER_KERNEL(GPU, complex128); REGISTER_KERNEL(GPU, bool); @@ -122,9 +127,15 @@ REGISTER_SYCL_KERNEL(SYCL, float); REGISTER_SYCL_KERNEL(SYCL, double); REGISTER_SYCL_KERNEL(SYCL, uint8); REGISTER_SYCL_KERNEL(SYCL, int8); +REGISTER_SYCL_KERNEL(SYCL, qint8); REGISTER_SYCL_KERNEL(SYCL, uint16); REGISTER_SYCL_KERNEL(SYCL, int16); +REGISTER_SYCL_KERNEL(SYCL, qint16); +REGISTER_SYCL_KERNEL(SYCL, quint16); +REGISTER_SYCL_KERNEL(SYCL, uint32); +REGISTER_SYCL_KERNEL(SYCL, qint32); REGISTER_SYCL_KERNEL(SYCL, int64); +REGISTER_SYCL_KERNEL(SYCL, uint64); REGISTER_SYCL_KERNEL(SYCL, bool); #undef REGISTER_SYCL_KERNEL #endif -- GitLab From 4774889094d3f1787a38cfbeb0670cb4fb6e24ff Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 26 Feb 2018 19:57:42 -0800 Subject: [PATCH 046/311] Fixes and simplification in the Keras training engine. - Explicitly disallow sample/class weighting in eager (it was never supported) - Remove tests for it (which were actually ignoring sample/class weights) - Make sample weight placeholders placeholder_with_default, and do not create all-ones numpy arrays to feed them when no sample weights are provided (this might lead to better performance) PiperOrigin-RevId: 187121215 --- .../python/keras/_impl/keras/backend.py | 11 +- .../python/keras/_impl/keras/callbacks.py | 20 +- .../keras/_impl/keras/engine/training.py | 151 +++--- .../_impl/keras/engine/training_eager.py | 17 +- .../_impl/keras/engine/training_eager_test.py | 436 ------------------ .../keras/_impl/keras/engine/training_test.py | 8 - 6 files changed, 110 insertions(+), 533 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index a2db05f6cf..2b75666b9e 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -2749,7 +2749,7 @@ class Function(object): self.updates_op = control_flow_ops.group(*updates_ops) self.name = name # additional tensor substitutions - self.feed_dict = session_kwargs.pop('feed_dict', {}) + self.feed_dict = session_kwargs.pop('feed_dict', None) # additional operations self.fetches = session_kwargs.pop('fetches', []) if not isinstance(self.fetches, list): @@ -2759,8 +2759,15 @@ class Function(object): def __call__(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') - feed_dict = self.feed_dict.copy() + + if self.feed_dict: + feed_dict = self.feed_dict.copy() + else: + feed_dict = {} + for tensor, value in zip(self.inputs, inputs): + if value is None: + continue if is_sparse(tensor): sparse_coo = value.tocoo() indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index f6c4661425..deb1e8867d 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -778,16 +778,24 @@ class TensorBoard(Callback): while i < val_size: step = min(self.batch_size, val_size - i) batch_val = [] - batch_val.append(val_data[0][i:i + step]) - batch_val.append(val_data[1][i:i + step]) - batch_val.append(val_data[2][i:i + step]) + batch_val.append(val_data[0][i:i + step] + if val_data[0] is not None else None) + batch_val.append(val_data[1][i:i + step] + if val_data[1] is not None else None) + batch_val.append(val_data[2][i:i + step] + if val_data[2] is not None else None) if self.model.uses_learning_phase: # do not slice the learning phase - batch_val = [x[i:i + step] for x in val_data[:-1]] + batch_val = [x[i:i + step] if x is not None else None + for x in val_data[:-1]] batch_val.append(val_data[-1]) else: - batch_val = [x[i:i + step] for x in val_data] - feed_dict = dict(zip(tensors, batch_val)) + batch_val = [x[i:i + step] if x is not None else None + for x in val_data] + feed_dict = {} + for key, val in zip(tensors, batch_val): + if val is not None: + feed_dict[key] = val result = self.sess.run([self.merged], feed_dict=feed_dict) summary_str = result[0] self.writer.add_summary(summary_str, epoch) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 57451ad470..63bea08ac5 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -40,6 +40,7 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.layers.base import _DeferredTensor +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.util.tf_export import tf_export @@ -225,9 +226,9 @@ def _check_array_lengths(inputs, targets, weights=None): # return a set with the variation between # different shapes, with None => 0 if x is None: - return {0} + return {} else: - return set([0 if y is None else y.shape[0] for y in x]) + return set([y.shape[0] for y in x if y is not None]) set_x = set_of_lengths(inputs) set_y = set_of_lengths(targets) @@ -259,7 +260,8 @@ def _check_array_lengths(inputs, targets, weights=None): def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): """Does validation on the compatibility of targets and loss functions. - This helps prevent users from using loss functions incorrectly. + This helps prevent users from using loss functions incorrectly. This check + is purely for UX purposes. Arguments: targets: list of Numpy arrays of targets. @@ -275,7 +277,7 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): losses.categorical_crossentropy } for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None: + if y is None or loss is None or tensor_util.is_tensor(y): continue if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: @@ -507,10 +509,7 @@ def _standardize_weights(y, (existing_classes - existing_class_weight)) return weights else: - if sample_weight_mode is None: - return np.ones((y.shape[0],), dtype=K.floatx()) - else: - return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx()) + return None @tf_export('keras.models.Model', 'keras.Model') @@ -862,12 +861,12 @@ class Model(Network): sample_weights.append(None) else: if sample_weight_mode == 'temporal': - sample_weights.append( - K.placeholder(ndim=2, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [[1.]], shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: - sample_weights.append( - K.placeholder(ndim=1, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [1.], shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] @@ -1314,7 +1313,7 @@ class Model(Network): for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1424,7 +1423,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): + if ins and isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1518,7 +1517,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -2070,10 +2069,6 @@ class Model(Network): val_y, sample_weight=val_sample_weight, batch_size=batch_size) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights elif validation_split and 0. < validation_split < 1.: do_validation = True @@ -2085,36 +2080,34 @@ class Model(Network): y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) sample_weights, val_sample_weights = (slice_arrays( sample_weights, 0, split_at), slice_arrays(sample_weights, split_at)) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights - elif validation_steps: + val_x = [] + val_y = [] + val_sample_weights = [] do_validation = True - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = [0.] - - # Prepare input arrays and training function. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights # Prepare display labels. out_labels = self.metrics_names if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + if do_validation: + if any([w is not None for w in val_sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported' + ' when eager execution is enabled, for now.') callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + val_ins = val_x + val_y else: callback_metrics = copy.copy(out_labels) return training_eager.fit_loop( self, - ins, + x + y, out_labels=out_labels, batch_size=batch_size, epochs=epochs, @@ -2127,18 +2120,25 @@ class Model(Network): steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) else: + # Prepare input arrays and training function. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() f = self.train_function if do_validation: - if context.in_graph_mode(): - self._make_test_function() - val_f = self.test_function - else: - val_f = None + self._make_test_function() + val_f = self.test_function callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + val_ins = val_x + val_y + val_sample_weights + [0] + else: + val_ins = val_x + val_y + val_sample_weights else: val_f = None callback_metrics = copy.copy(out_labels) @@ -2229,16 +2229,20 @@ class Model(Network): y, sample_weight=sample_weight, batch_size=batch_size) - # Prepare inputs, delegate logic to `_test_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') return training_eager.test_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x + y, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_test_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights + self._make_test_function() f = self.test_function return self._test_loop( @@ -2276,16 +2280,16 @@ class Model(Network): 'argument.') x, _, _ = self._standardize_user_data(x) - # Prepare inputs, delegate logic to `_predict_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): return training_eager.predict_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_predict_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() f = self.predict_function @@ -2327,20 +2331,26 @@ class Model(Network): and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. + Raises: + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.train_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.train_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() outputs = self.train_function(ins) @@ -2377,18 +2387,21 @@ class Model(Network): the display labels for the scalar outputs. Raises: - ValueError: in case of invalid arguments. + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.test_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.test_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights self._make_test_function() outputs = self.test_function(ins) @@ -2408,14 +2421,9 @@ class Model(Network): """ x, _, _ = self._standardize_user_data(x) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): ins_batch_converted = [] - for ib in ins: + for ib in x: ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] @@ -2426,6 +2434,11 @@ class Model(Network): return outs if context.in_graph_mode(): + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() outputs = self.predict_function(ins) if len(outputs) == 1: @@ -2643,7 +2656,7 @@ class Model(Network): val_data = val_x + val_y + val_sample_weights if self.uses_learning_phase and not isinstance( K.learning_phase(), int): - val_data += [0.] + val_data += [0] for cbk in callbacks: cbk.validation_data = val_data diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 282dd0dc0d..cdf189adef 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -139,6 +139,8 @@ def _model_loss(model, inputs, targets, training=False): model.output_names[i]) loss_metrics.append(K.mean(output_loss)) + # TODO(fchollet): support masking; in practice `_keras_mask` is never + # set in this context currently. mask = outs[i]._keras_mask # adapted from weighted_loss_fn if mask is not None: @@ -148,17 +150,7 @@ def _model_loss(model, inputs, targets, training=False): # to the number of unmasked samples. output_loss /= K.mean(mask) - # adapted from weighted_loss_fn - # apply sample weighting - if model.sample_weights: - # reduce score_array to same ndim as weight array - ndim = K.ndim(output_loss) - weight_ndim = K.ndim(model.sample_weights) - output_loss = K.mean(output_loss, axis=list(range(weight_ndim, ndim))) - output_loss *= model.sample_weights - output_loss /= K.mean(K.cast(K.not_equal(model.sample_weights, 0), - K.floatx())) - output_loss = K.mean(output_loss) + # TODO(fchollet): support sample weighting loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -231,7 +223,8 @@ def train_on_batch(model, ins): """ ins_batch_converted = [] for ib in ins: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) + if ib is not None: + ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] eager_model_outputs = [] for i in range(len(model.inputs)): diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 3d94b7537f..550b86a71d 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -24,9 +24,7 @@ import numpy as np from tensorflow.python.framework import ops from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils -from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.rmsprop import RMSPropOptimizer @@ -311,440 +309,6 @@ class TrainingTest(test.TestCase): optimizer='rms') -class LossWeightingTest(test.TestCase): - - def test_class_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_test = y_test.copy() - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) - test_ids = np.where(int_y_test == np.array(weighted_class))[0] - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - class_weight=class_weight, - validation_data=(x_train, y_train, sample_weight)) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight, - validation_split=0.1) - - model.train_on_batch( - x_train[:batch_size], y_train[:batch_size], class_weight=class_weight) - ref_score = model.evaluate(x_test, y_test, verbose=0) - score = model.evaluate( - x_test[test_ids, :], y_test[test_ids, :], verbose=0) - self.assertLess(score, ref_score) - - def test_sample_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(43) - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight, - validation_split=0.1) - model.train_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - model.test_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - - def test_temporal_sample_weights(self): - num_classes = 5 - weighted_class = 3 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - - np.random.seed(1337) - (_, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode='temporal') - - def test_class_weight_invalid_use_case(self): - num_classes = 5 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - class_weight = dict([(i, 1.) for i in range(num_classes)]) - - del class_weight[1] - with self.assertRaises(ValueError): - model.fit(x_train, y_train, - epochs=0, verbose=0, class_weight=class_weight) - - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode=[]) - - # Build multi-output model - x = keras.Input((3,)) - y1 = keras.layers.Dense(4, name='1')(x) - y2 = keras.layers.Dense(4, name='2')(x) - model = keras.models.Model(x, [y1, y2]) - model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse') - x_np = np.random.random((10, 3)) - y_np = np.random.random((10, 4)) - w_np = np.random.random((10,)) - # This will work - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': w_np}) - # These will not - with self.assertRaises(ValueError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=[w_np]) - with self.assertRaises(TypeError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=w_np) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((11,)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - - -class TestDynamicTrainability(test.TestCase): - - def test_trainable_warning(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3)) - model.trainable = False - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - model.trainable = True - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - - def test_trainable_argument(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3, trainable=False)) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - # test with nesting - inputs = keras.layers.Input(shape=(3,)) - output = model(inputs) - model = keras.models.Model(inputs, output) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - def test_layer_trainability_switch(self): - # with constructor argument, in Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, trainable=False, input_dim=1)) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Sequential - model = keras.models.Sequential() - layer = keras.layers.Dense(2, input_dim=1) - model.add(layer) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # with constructor argument, in Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2, trainable=False)(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Model - x = keras.layers.Input(shape=(1,)) - layer = keras.layers.Dense(2) - y = layer(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_model_trainability_switch(self): - # a non-trainable model has no trainable weights - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - model = keras.models.Model(x, y) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # same for Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=1)) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_nested_model_trainability(self): - - # a Sequential inside a Model - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Sequential inside a Sequential - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Sequential - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - -class TestTrainingUtils(test.TestCase): - - def test_check_array_lengths(self): - keras.engine.training._check_array_lengths(None, None, None) - a_np = np.random.random((4, 3, 3)) - keras.engine.training._check_array_lengths(a_np, a_np, a_np) - keras.engine.training._check_array_lengths( - [a_np, a_np], [a_np, a_np], [a_np, a_np]) - keras.engine.training._check_array_lengths([None], [None], [None]) - - b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) - - def test_slice_arrays(self): - input_a = np.random.random((10, 3)) - slice_arrays(None) - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None, [1, 1], None, [1, 1]] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = None - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - - def test_fit_with_BatchNorm(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_dim=4)) - model.add(keras.layers.BatchNormalization()) - model.add(keras.layers.Activation('tanh')) - model.add(keras.layers.Dropout(0.2)) - - input_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 10)) - - model.compile(loss='binary_crossentropy', optimizer=RMSPropOptimizer(0.001)) - model.fit(input_a_np, output_b_np, epochs=1, batch_size=5, verbose=0) - - def test_fit_with_regularization(self): - model = keras.models.Sequential() - with self.assertRaises(ValueError): - model.add( - keras.layers.Dense(4, input_dim=3, - kernel_regularizer=keras.regularizers.l2(0.01), - activity_regularizer=keras.regularizers.l1(0.01))) - - if __name__ == '__main__': # Bazel sets these environment variables to very long paths. # Tempfile uses them to create long paths, and in turn multiprocessing diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index 9651eb9f14..6ca5941e9a 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -1045,16 +1045,8 @@ class TestTrainingUtils(test.TestCase): keras.engine.training._check_array_lengths([None], [None], [None]) b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) with self.assertRaises(ValueError): keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) def test_slice_arrays(self): input_a = np.random.random((10, 3)) -- GitLab From 6825af46c53e6ad0b1260e5a96a4ef46b7703e46 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 19:58:18 -0800 Subject: [PATCH 047/311] Fix bug in deserializing CondContexts. PiperOrigin-RevId: 187121244 --- tensorflow/python/ops/control_flow_ops.py | 11 ++++- tensorflow/python/training/saver_test.py | 49 ++++++++++++++++------- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 85944efbe8..fb9e2188d7 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1718,8 +1718,15 @@ class CondContext(ControlFlowContext): self._pivot = g.as_graph_element( ops.prepend_name_scope(context_def.pivot_name, import_scope)) self._branch = context_def.branch - super(CondContext, self).__init__( - values_def=context_def.values_def, import_scope=import_scope) + super(CondContext, self).__init__(values_def=context_def.values_def, + import_scope=import_scope) + # The predicate and pivot ops appear in self._values, but don't have self + # set as their control context. The __init__ call above will set self for + # all values, so manually override the predicate and pivot contexts here. + # pylint: disable=protected-access + self._pred.op._set_control_flow_context(self.outer_context) + self._pivot.op._set_control_flow_context(self.outer_context) + # pylint: enable=protected-access @property def pred(self): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index b366ed30f3..b758ceaab0 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2041,29 +2041,24 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) - def testNestedWhileLoops(self): - test_dir = self._get_test_dir("nested_whiles") + def _testWhileLoopAndGradientSerDes(self, outer_body_fn): + # Build a while loop with `outer_body_fn`, export it, and verify that it can + # be imported and the gradient can be built and run correctly. + + test_dir = self._get_test_dir("nested_control_flow") filename = os.path.join(test_dir, "metafile") saver_ckpt = os.path.join(test_dir, "saver.ckpt") - # Create two simple nested while loops. + # Create while loop using `outer_body_fn`. with ops_lib.Graph().as_default(): - def body(i, x): - _, r = control_flow_ops.while_loop(lambda j, y: j < 3, - lambda j, y: (j + 1, y + x), - [0, 0]) - return i + 1, x + r - var = variables.Variable(0) var_name = var.name - - _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, outer_body_fn, [0, var]) output_name = output.name - init_op = variables.global_variables_initializer() - # Generate a MetaGraphDef containing the nested loops. + # Generate a MetaGraphDef containing the while loop. with session.Session() as sess: sess.run(init_op) sess.run(output) @@ -2071,8 +2066,8 @@ class MetaGraphTest(test.TestCase): saver.save(sess, saver_ckpt) saver.export_meta_graph(filename) - # Build and run the gradients of the nested while loop. We use this below - # to verify that the gradients are correct with an imported MetaGraphDef. + # Build and run the gradients of the while loop. We use this below to + # verify that the gradients are correct with an imported MetaGraphDef. grad = gradients_impl.gradients([output], [var]) with session.Session() as sess: sess.run(init_op) @@ -2096,6 +2091,30 @@ class MetaGraphTest(test.TestCase): actual_grad_value = sess.run(grad) self.assertEqual(expected_grad_value, actual_grad_value) + def testNestedWhileLoopsSerDes(self): + # Test two simple nested while loops. + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + self._testWhileLoopAndGradientSerDes(body) + + def testNestedControlFlowSerDes(self): + # Test while loop in a cond in a while loop. + # pylint: disable=g-long-lambda + def body(i, x): + cond_result = control_flow_ops.cond( + i > 0, + lambda: control_flow_ops.while_loop( + lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0])[1], + lambda: x) + return i + 1, cond_result + # pylint: enable=g-long-lambda + self._testWhileLoopAndGradientSerDes(body) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From bac2cb076281a90902609cea5ee2b28c5d821657 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 20:21:07 -0800 Subject: [PATCH 048/311] Add helpers to stream data from the GCE VM to a Cloud TPU. PiperOrigin-RevId: 187122870 --- tensorflow/contrib/tpu/BUILD | 28 +++ tensorflow/contrib/tpu/python/tpu/datasets.py | 192 ++++++++++++++++++ .../contrib/tpu/python/tpu/datasets_test.py | 181 +++++++++++++++++ 3 files changed, 401 insertions(+) create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets.py create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets_test.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index c48e84ddfa..095b4821f1 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -163,6 +163,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":datasets", ":profiler", ":tpu_py", "//tensorflow/contrib/tpu/proto:topology_proto_py", @@ -181,6 +182,33 @@ py_library( ], ) +py_library( + name = "datasets", + srcs = [ + "python/tpu/datasets.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:function", + "//tensorflow/python:functional_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", + ], +) + +tf_py_test( + name = "datasets_test", + srcs = ["python/tpu/datasets_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + ":datasets", + ], + grpc_enabled = True, +) + tf_py_test( name = "tpu_test", size = "small", diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py new file mode 100644 index 0000000000..29aea98542 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -0,0 +1,192 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ====================================== +"""Library of Cloud TPU helper functions for data loading.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import functional_ops + + +def _TextLineDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TextLineDataset(filename, buffer_size=buffer_size) + return dataset + + +def _TFRecordDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TFRecordDataset(filename, buffer_size=buffer_size) + return dataset + + +_FILETYPE_MAP = { + 'tfrecord': _TFRecordDataset, + 'textline': _TextLineDataset, + 'text': _TextLineDataset, +} + + +def StreamingFilesDataset(files, + filetype=None, + file_reader_job=None, + worker_job=None, + num_epochs=None, + filename_shuffle_buffer_size=None, + num_parallel_reads=None, + batch_transfer_size=None, + sloppy=None): + """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM). + + Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read + files local to your GCE VM. In order to train using files stored on your local + VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset + helper to generate a dataset to feed your Cloud TPU with files from your GCE + VM. + + The resulting dataset may return an OutOfRangeError if there are no files + found as a result of the fileglob expansion. + + Note: StreamingFilesDataset assumes that the session is using a + TPUClusterResolver and has therefore a worker and a coordinator job. File + loading will be done on the coordinator job. + + Args: + files: A string glob to match files, or a `tf.data.Dataset` generating file + names. + filetype: A string (one of 'tfrecord', or 'textline') or a single-argument + TensorFlow function that when given a filename returns a dataset. + file_reader_job: An optional string that corresponds to the job that should + perform the file reads. + worker_job: An optional string that corresponds to the job that should + process the tensors (i.e. your GPU or TPU worker). + num_epochs: The number of epochs through the training set that should be + generated. By default, it will repeat infinitely. + filename_shuffle_buffer_size: An optional integer whose value controls the + shuffling of the file names. If you would like to read from the files in + the same order, set to 0 or False. + num_parallel_reads: An optional integer controlling the number of files to + read from concurrently. (Set to 1 for no parallelism.) + batch_transfer_size: An optional integer controlling the batching used to + amortize the remote function invocation overhead. Set to a very large + number to increase throughput. Set to a very small number to reduce memory + consumption. Set to False to skip batching. + sloppy: (Optional.) If `True`, read input data as fast as possible, without + maintaining a deterministic order. Defaults to `False`. + Returns: + A `tf.data.Dataset` with an infinite stream of elements generated by a + parallel interleaving of the set of files matched (or generated) by `files` + with a type is the output of the dataset specified by `filetype`. + + Raises: + ValueError: if any argument is not of the expected type. + """ + if filetype is None: + filetype = 'tfrecord' + + if isinstance(filetype, str): + if filetype not in _FILETYPE_MAP: + raise ValueError('Unexpected filetype: %s' % filetype) + reader_fn = _FILETYPE_MAP[filetype] + elif callable(filetype): + reader_fn = filetype + else: + raise ValueError('filetype should be a string or a callable') + + file_reader_job = file_reader_job or 'coordinator' + + worker_job = worker_job or 'worker' + + if filename_shuffle_buffer_size is None: + filename_shuffle_buffer_size = 4096 + + num_parallel_reads = num_parallel_reads or 8 + + if batch_transfer_size is None: + batch_transfer_size = 1024 + + if sloppy is None: + sloppy = False + + with ops.device('/job:%s' % file_reader_job): + if isinstance(files, str): + source_dataset = dataset_ops.Dataset.list_files(files) + elif isinstance(files, dataset_ops.Dataset): + source_dataset = files + else: + raise ValueError('files was not a string or a dataset: %s' % files) + + if filename_shuffle_buffer_size: + source_dataset = source_dataset.shuffle( + buffer_size=filename_shuffle_buffer_size) + + # NOTE: We perform the `repeat` on the source dataset, because the output + # dataset does not currently have enough information to recreate an iterator + # over the source dataset when it reaches the end. + source_dataset = source_dataset.repeat(num_epochs) + + source_dataset = source_dataset.apply( + interleave_ops.parallel_interleave( + reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy)) + + if batch_transfer_size: + # Note: we can safely call batch_and_drop_remainder because we have an + # infinite stream of TFRecords. + source_dataset = source_dataset.apply( + batching.batch_and_drop_remainder(batch_transfer_size)) + + source_dataset = source_dataset.prefetch(1) + + source_iterator = source_dataset.make_one_shot_iterator() + source_handle = source_iterator.string_handle() + + @function.Defun(dtypes.string) + def LoadingFunc(h): + remote_iterator = iterator_ops.Iterator.from_string_handle( + h, source_dataset.output_types, source_dataset.output_shapes) + return remote_iterator.get_next() + + def MapFn(unused_input): + return functional_ops.remote_call( + args=[source_handle], + Tout=[dtypes.string], + f=LoadingFunc, + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + + with ops.device('/job:%s' % worker_job): + # TODO(saeta,mrry): Switch to using _GeneratorDataset. + + # identity = lambda x: x + # dummy = constant_op.constant(0) + # output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn, + # identity) + + output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn) + output_dataset = output_dataset.prefetch(1) + + if batch_transfer_size: + # Undo the batching used during the transfer. + output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1) + + return output_dataset diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py new file mode 100644 index 0000000000..2c40797792 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -0,0 +1,181 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU datasets tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.tpu.python.tpu import datasets +from tensorflow.core.protobuf import cluster_pb2 +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.lib.io import python_io +from tensorflow.python.platform import test +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat + +_NUM_FILES = 10 +_NUM_ENTRIES = 200 + + +class DatasetsTest(test.TestCase): + + def setUp(self): + super(DatasetsTest, self).setUp() + self._coord = server_lib.Server.create_local_server() + self._worker = server_lib.Server.create_local_server() + + self._cluster_def = cluster_pb2.ClusterDef() + worker_job = self._cluster_def.job.add() + worker_job.name = 'worker' + worker_job.tasks[0] = self._worker.target[len('grpc://'):] + coord_job = self._cluster_def.job.add() + coord_job.name = 'coordinator' + coord_job.tasks[0] = self._coord.target[len('grpc://'):] + + session_config = config_pb2.ConfigProto(cluster_def=self._cluster_def) + + self._sess = session.Session(self._worker.target, config=session_config) + + def testTextLineDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'text_line.%d.txt' % i) + contents = [] + for j in range(_NUM_ENTRIES): + contents.append(compat.as_bytes('%d: %d' % (i, j))) + with open(filename, 'wb') as f: + f.write(b'\n'.join(contents)) + all_contents.extend(contents) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'text_line.*.txt'), filetype='text') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'tf_record*'), filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDatasetFromDataset(self): + filenames = [] + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + filenames.append(filename) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + filenames = dataset_ops.Dataset.from_tensor_slices(filenames) + + dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testArbitraryReaderFunc(self): + + def MakeRecord(i, j): + return compat.as_bytes('%04d-%04d' % (i, j)) + + record_bytes = len(MakeRecord(10, 200)) + + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'fixed_length.%d' % i) + with open(filename, 'wb') as f: + for j in range(_NUM_ENTRIES): + record = MakeRecord(i, j) + f.write(record) + all_contents.append(record) + + def FixedLengthFile(filename): + return readers.FixedLengthRecordDataset(filename, record_bytes) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'fixed_length*'), + filetype=FixedLengthFile) + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testUnexpectedFiletypeString(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype='foo') + + def testUnexpectedFiletypeType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype=3) + + def testUnexpectedFilesType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset(123, filetype='tfrecord') + + +if __name__ == '__main__': + test.main() -- GitLab From 50daa198f85f21f3295dd6e1ad2951f38cc6c825 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:09:30 -0800 Subject: [PATCH 049/311] Automated g4 rollback of changelist 187092622 PiperOrigin-RevId: 187125995 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +------------- tensorflow/c/eager/runtime.cc | 14 ++++---------- tensorflow/c/eager/runtime.h | 3 --- tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 13 insertions(+), 35 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 16a2a15072..e55cb672e9 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,7 +21,6 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ - "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index b233dd5b93..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = tensorflow::KernelAndDevice::Init( - ndef, ctx->func_lib(device), &ctx->runner, kernel); + status->status = + tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 29944df4c2..3356054cd0 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -46,15 +45,7 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : thread_pool(new tensorflow::thread::ThreadPool( - opts.session_options.options.env, "EagerCompute", - opts.session_options.options.config - .inter_op_parallelism_threads() != 0 - ? opts.session_options.options.config - .inter_op_parallelism_threads() - : tensorflow::port::NumSchedulableCPUs())), - runner([this](std::function f) { thread_pool->Schedule(f); }), - policy(opts.policy), + : policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -63,9 +54,6 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} - const std::unique_ptr thread_pool; - std::function)> runner; - const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index b9618420f0..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,22 +255,17 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; - out->runner_ = nullptr; - out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; - out->runner_ = runner; - out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -301,11 +296,10 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - if (runner_ == nullptr) { - params.runner = &default_runner_; - } else { - params.runner = runner_; - } + // TODO(apassos): use a thread pool. + std::function)> runner = + [](std::function f) { f(); }; + params.runner = &runner; OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index fa5f839977..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,7 +169,6 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -189,8 +188,6 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; - std::function)>* runner_; - std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index ab0b535e1a..643153058c 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel); + Status s = + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &k)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From b053b1006abdfcf1f790a729a412001ebbaf679f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:25:22 -0800 Subject: [PATCH 050/311] Improve error handling in strided_slice_op to fail more gracefully and return an error status instead of crashing. PiperOrigin-RevId: 187126888 --- tensorflow/core/kernels/strided_slice_op.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 7745effe2a..1e3e92a68a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -109,17 +109,27 @@ class StridedSliceOp : public OpKernel { if (is_identity) { VLOG(1) << "Strided slice identity "; Tensor tmp; - CHECK(tmp.CopyFrom(input, final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(input, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } // Optimization #2, slice is memory contiguous (only occurs in dim 0) if (slice_dim0 && IsDim0SliceAligned(input.shape(), begin[0], end[0])) { - CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true. + OP_REQUIRES(context, input.dims() >= 1, + errors::InvalidArgument( + "Input must have rank at least 1, got: ", input.dims())); + // Otherwise, is_identity should be true. VLOG(1) << "Strided slice dim 0: " << input.shape().DebugString(); + OP_REQUIRES( + context, begin[0] <= end[0], + errors::InvalidArgument("begin[0] (", begin[0], + ") must less or equal to end[0] (", end[0])); + Tensor slice = input.Slice(begin[0], end[0]); Tensor tmp; - CHECK(tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(slice, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } @@ -238,7 +248,8 @@ class StridedSliceGradOp : public OpKernel { if (processing_shape.dims() == 0) { auto in = context->input(4); - CHECK(result->CopyFrom(in, processing_shape)); + OP_REQUIRES(context, result->CopyFrom(in, processing_shape), + errors::Internal("Copy failed")); return; } -- GitLab From e4b294e080dc5f339d1e639e1e9907b53461b754 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 01:02:36 -0800 Subject: [PATCH 051/311] Add documentation to Grappler RewriterConfig to give a short description for each of the optimizer on what they do. PiperOrigin-RevId: 187143156 --- tensorflow/core/protobuf/rewriter_config.proto | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 504ed5d819..875e4663db 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -30,12 +30,17 @@ message RewriterConfig { } // Optimize tensor layouts (default is ON) + // e.g. This will try to use NCHW layout on GPU which is faster. Toggle layout_optimizer = 1; // Fold constants (default is ON) + // Statically infer the value of tensors when possible, and materialize the + // result using constants. Toggle constant_folding = 3; // Arithmetic optimizations (default is ON) + // e.g. Simplify arithmetic ops; merge ops with same value (like constants). Toggle arithmetic_optimization = 7; // Control dependency optimizations (default is ON). + // Remove redundant control dependencies, which may enable other optimization. Toggle dependency_optimization = 8; // Loop optimizations (default is OFF). Toggle loop_optimization = 9; @@ -49,12 +54,20 @@ message RewriterConfig { NO_MEM_OPT = 1; // Driven by manual op-level annotations. MANUAL = 2; + // Driven by heuristics. The behavior of these heuristics is subject to // change. Currently includes an experimental recomputation and swapping // heuristics. Manual annotations are respected, but additional nodes are // selected automatically. + + // Swapping heuristic will move a tensor from the GPU to the CPU and move + // it back when needed to reduce peak memory usage. SWAPPING_HEURISTICS = 4; + // Recomputation heuristics will recompute ops (such as Relu activation) + // during backprop instead of storing them, reducing peak memory usage. RECOMPUTATION_HEURISTICS = 5; + // Scheduling will split big ops such as AddN and try to enforce a schedule + // of the new computations that decreases peak memory usage. SCHEDULING_HEURISTICS = 6; // Use any combination of swapping and recomputation heuristics. HEURISTICS = 3; -- GitLab From 7f25c9d127e8535170d0575c038fd42222887dd4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 06:00:21 -0800 Subject: [PATCH 052/311] Enable dynamic function calls. These are compiled just in time by inserting a call to compile. PiperOrigin-RevId: 187165096 --- tensorflow/contrib/py2tf/__init__.py | 4 +- tensorflow/contrib/py2tf/converters/BUILD | 13 +-- .../contrib/py2tf/converters/call_trees.py | 76 +++++++------- .../py2tf/converters/call_trees_test.py | 16 +++ .../py2tf/converters/converter_test_base.py | 32 ++++-- tensorflow/contrib/py2tf/impl/api.py | 99 ++++++++++++++----- 6 files changed, 163 insertions(+), 77 deletions(-) diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/py2tf/__init__.py index 379fa7fd5c..6531183cb5 100644 --- a/tensorflow/contrib/py2tf/__init__.py +++ b/tensorflow/contrib/py2tf/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl.api import convert +from tensorflow.contrib.py2tf.impl.api import converted_call from tensorflow.contrib.py2tf.impl.api import graph_ready from tensorflow.contrib.py2tf.impl.api import to_code from tensorflow.contrib.py2tf.impl.api import to_graph @@ -30,7 +31,8 @@ from tensorflow.contrib.py2tf.pyct.transformer import PyFlowParseError from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'to_graph', 'to_code', 'convert', 'graph_ready', 'utils', 'PyFlowParseError' + 'to_graph', 'to_code', 'convert', 'graph_ready', 'converted_call', 'utils', + 'PyFlowParseError' ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 42baaaaba7..78f46bc05f 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -46,6 +46,7 @@ py_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":converters", + "//tensorflow/contrib/py2tf/pyct", "//tensorflow/contrib/py2tf/pyct/static_analysis", "//tensorflow/contrib/py2tf/utils", "@gast_archive//:gast", @@ -59,7 +60,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -70,7 +70,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -81,7 +80,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -92,7 +90,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/py2tf/impl", "//tensorflow/python:client_testlib", ], ) @@ -103,7 +101,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -114,7 +111,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -125,7 +121,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -136,7 +131,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -157,7 +151,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -168,7 +161,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -184,7 +176,6 @@ py_test( ], deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index 1050ba654c..f18f9f6086 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -27,6 +27,7 @@ import types import gast from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import templates from tensorflow.contrib.py2tf.pyct import transformer @@ -72,9 +73,8 @@ class CallTreeTransformer(transformer.Base): self.uncompiled_modules = uncompiled_modules self.nocompile_decorators = nocompile_decorators - # pylint:disable=invalid-name - def _resolve_name(self, node): + """Used to resolve decorator info.""" if isinstance(node, gast.Call): return self._resolve_name(node.func) if isinstance(node, gast.Name): @@ -99,7 +99,13 @@ class CallTreeTransformer(transformer.Base): (owner_type, node.attr)) return None + def _function_is_compilable(self, target_entity): + """Determines whether an entity can be compiled at all.""" + # TODO(mdan): This is just a placeholder. Implement. + return not isinstance(target_entity, types.BuiltinFunctionType) + def _should_compile(self, node, fqn): + """Determines whether an entity should be compiled in the context.""" for i in range(1, len(fqn)): if fqn[:i] in self.uncompiled_modules: return False @@ -141,33 +147,6 @@ class CallTreeTransformer(transformer.Base): return True - def _determine_function_owner(self, m): - # TODO(mdan): The parent type should be known at analysis. Use that instead. - if hasattr(m, 'im_class'): # Python 2 - return m.im_class - if hasattr(m, '__qualname__'): # Python 3 - # Object attributes: should be bound to "self". - if hasattr(m, '__self__'): - return type(m.__self__) - - # Class attributes: should have the owner name in their namespace. - qn = m.__qualname__.split('.') - if len(qn) < 2: - return None - owner_name, func_name = qn[-2:] - if func_name != m.__name__: - raise ValueError('Inconsistent names detected ' - '(__qualname__[1] = "%s", __name__ = "%s") for %s.' % - (func_name, m.__name__, m)) - if owner_name == '': - return None - if owner_name not in self.context.namespace: - raise ValueError( - 'Could not resolve name "%s" while analyzing %s. Namespace:\n%s' % - (owner_name, m, self.context.namespace)) - return self.context.namespace[owner_name] - return None - def _rename_compilable_function(self, node): assert anno.hasanno(node.func, 'live_val') assert anno.hasanno(node.func, 'fqn') @@ -182,7 +161,11 @@ class CallTreeTransformer(transformer.Base): target_fqn, live_entity=target_entity) do_rename = True else: - owner_type = self._determine_function_owner(target_entity) + if anno.hasanno(node.func, 'parent_type'): + owner_type = anno.getanno(node.func, 'parent_type') + else: + # Fallback - not reliable. + owner_type = inspect_utils.getmethodclass(target_entity) new_name, do_rename = self.context.namer.compiled_function_name( target_fqn, live_entity=target_entity, owner_type=owner_type) @@ -202,9 +185,32 @@ class CallTreeTransformer(transformer.Base): """ return templates.replace(template, func=node.func, original_args=node.args) - def _function_is_compilable(self, target_entity): - # TODO(mdan): This is just a placeholder. Implement. - return not isinstance(target_entity, types.BuiltinFunctionType) + def _converted_call(self, node): + """Inlines a dynamic conversion for a dynamic function.""" + # TODO(mdan): Pass information on the statically compiled functions. + # Having access to the statically compiled functions can help avoid + # unnecessary compilation. + # For example, this would lead to function `a` being compiled twice: + # + # def a(): + # v = b + # b() + # def b(): + # a() + # + # This is really a problem with recursive calls, which currently can + # only be gated by a static condition, and should be rare. + # TODO(mdan): It probably makes sense to use dynamic conversion every time. + # Before we could convert all the time though, we'd need a reasonable + # caching mechanism. + template = """ + py2tf_api.converted_call(func, True, False, {}, original_args) + """ + call_expr = templates.replace( + template, func=node.func, original_args=node.args) + return call_expr[0].value + + # pylint:disable=invalid-name def visit_Expr(self, node): if isinstance(node.value, gast.Call): @@ -245,9 +251,9 @@ class CallTreeTransformer(transformer.Base): raise NotImplementedError('py_func with return values') else: if self.context.recursive: - raise NotImplementedError('Could not resolve target function.') + node = self._converted_call(node) else: - # TODO(mdan): Double check. Is this reachable code? + # Unresolved functions are allowed in non-recursive mode. pass return node diff --git a/tensorflow/contrib/py2tf/converters/call_trees_test.py b/tensorflow/contrib/py2tf/converters/call_trees_test.py index 777648dc0b..d482a9ef78 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees_test.py +++ b/tensorflow/contrib/py2tf/converters/call_trees_test.py @@ -47,6 +47,21 @@ class CallTreesTest(converter_test_base.TestCase): result.renamed_test_fn_1 = renamed_test_fn_1 self.assertEquals(3, result.test_fn_2(1)) + def test_dynamic_function(self): + + def test_fn_1(): + raise ValueError('This should be masked by the mock.') + + def test_fn_2(f): + return f() + 3 + + node = self.parse_and_analyze(test_fn_2, {}) + node = call_trees.transform(node, self.ctx, (), ()) + + with self.compiled(node) as result: + # 10 = 7 (from the mock) + 3 (from test_fn_2) + self.assertEquals(10, result.test_fn_2(test_fn_1)) + def test_simple_methods(self): class TestClass(object): @@ -59,6 +74,7 @@ class CallTreesTest(converter_test_base.TestCase): node = self.parse_and_analyze( TestClass.test_fn_2, {'TestClass': TestClass}, + namer=converter_test_base.FakeNoRenameNamer(), arg_types={'self': (TestClass.__name__, TestClass)}) node = call_trees.transform(node, self.ctx, (), ()) diff --git a/tensorflow/contrib/py2tf/converters/converter_test_base.py b/tensorflow/contrib/py2tf/converters/converter_test_base.py index afa5c2f96f..1f98d8469c 100644 --- a/tensorflow/contrib/py2tf/converters/converter_test_base.py +++ b/tensorflow/contrib/py2tf/converters/converter_test_base.py @@ -25,6 +25,7 @@ from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.pyct import compiler from tensorflow.contrib.py2tf.pyct import context from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import pretty_printer from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct.static_analysis import activity from tensorflow.contrib.py2tf.pyct.static_analysis import live_values @@ -52,26 +53,43 @@ class FakeNamer(object): return ('renamed_%s' % '_'.join(original_fqn)), True +class FakeNoRenameNamer(FakeNamer): + + def compiled_function_name(self, original_fqn, **_): + return str(original_fqn), False + + class TestCase(test.TestCase): """Base class for unit tests in this module. Contains relevant utilities.""" @contextlib.contextmanager def compiled(self, node, *symbols): - source = '' + source = None + + self.dynamic_calls = [] + def converted_call(*args): + """Mock version of api.converted_call.""" + self.dynamic_calls.append(args) + return 7 + try: result, source = compiler.ast_to_object(node) - result.tf = self.make_fake_tf(*symbols) + result.tf = self.make_fake_mod('fake_tf', *symbols) result.py2tf_utils = utils + result.py2tf_api = self.make_fake_mod('fake_api', converted_call) yield result except Exception: # pylint:disable=broad-except - print('Offending compiled code:\n%s' % source) + if source is None: + print('Offending AST:\n%s' % pretty_printer.fmt(node, color=False)) + else: + print('Offending compiled code:\n%s' % source) raise - def make_fake_tf(self, *symbols): - fake_tf = imp.new_module('fake_tf') + def make_fake_mod(self, name, *symbols): + fake_mod = imp.new_module(name) for s in symbols: - setattr(fake_tf, s.__name__, s) - return fake_tf + setattr(fake_mod, s.__name__, s) + return fake_mod def attach_namespace(self, module, **ns): for k, v in ns.items(): diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py index 29d2e038a7..48100aac32 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/py2tf/impl/api.py @@ -26,7 +26,9 @@ import six from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.impl import conversion from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.utils import builtins from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect @@ -110,28 +112,7 @@ def convert(recursive=False, verbose=False, arg_types=None): @wraps(f) def wrapper(*args, **kwargs): - """Wrapper that calls the compiled version of the wrapped function.""" - partial_types = () - arg_values = {} - arg_names = tf_inspect.getargspec(f)[0] - for name, arg in zip(arg_names, args): - arg_values[name] = arg - arg_class = arg.__class__ - # If arg_value_hints specifies any name, use that instead. - if name not in arg_types: - arg_types[name] = (arg_class.__name__, arg_class) - if name == 'self' and tf_inspect.isclass(arg_class): - # Annotated methods need to specify that their owner type is partial, - # otherwise other members they call will not be converted. - partial_types = (arg_class,) - wrapped = to_graph( - f, - recursive=recursive, - verbose=verbose, - arg_values=arg_values, - arg_types=arg_types, - partial_types=partial_types) - return wrapped(*args, **kwargs) + return converted_call(f, recursive, verbose, arg_types, *args, **kwargs) # Sometimes the decorator is just desugared, making it impossible to detect. # This attribute makes detection easier. @@ -141,6 +122,78 @@ def convert(recursive=False, verbose=False, arg_types=None): return decorator +def converted_call(f, recursive, verbose, arg_types, *args, **kwargs): + """Compiles a function call inline.""" + # TODO(mdan): This needs cleanup. + # In particular, we may want to avoid renaming functions altogether. + + if conversion.is_whitelisted_for_graph(f): + return f(*args, **kwargs) + + unknown_arg_value = object() # Sentinel for arguments of unknown value + + if tf_inspect.isbuiltin(f): + return builtins.dynamic_builtin(f, *args, **kwargs) + + if tf_inspect.isfunction(f) or tf_inspect.ismethod(f): + # Regular functions + target_entity = f + arg_map_target = f + effective_args = args + f_class = inspect_utils.getmethodclass(f) + + if f_class is not None: + partial_types = (f_class,) + else: + partial_types = () + + elif tf_inspect.isclass(f): + # Constructors + target_entity = f + arg_map_target = f.__init__ + effective_args = (unknown_arg_value,) + args + partial_types = () + + elif hasattr(f, '__call__') and hasattr(f, '__class__'): + # Callable objects + target_entity = f.__call__ + arg_map_target = f.__call__ + effective_args = (f,) + args + partial_types = (f.__class__,) + + else: + NotImplementedError('unknown callable type "%s"' % type(f)) + + arg_values = tf_inspect.getcallargs(arg_map_target, *args, **kwargs) + for name, arg in arg_values.items(): + if arg is unknown_arg_value: + continue + arg_class = arg.__class__ + # If arg_value_hints specifies any name, use that instead. + if name not in arg_types: + arg_types[name] = (arg_class.__name__, arg_class) + + # When called from within a decorator, this is the only indication that + # the function is a method - it appears that the decorator is applied + # before the method is bound. + if not partial_types: + if 'self' in arg_values: + if tf_inspect.isclass(arg_values['self'].__class__): + partial_types = (arg_values['self'].__class__,) + elif 'cls' in arg_values: + if tf_inspect.isclass(arg_values['cls']): + partial_types = (arg_values['cls'],) + + converted_f = to_graph( + target_entity, + recursive=recursive, + verbose=verbose, + arg_values=arg_values, + arg_types=arg_types, + partial_types=partial_types) + return converted_f(*effective_args, **kwargs) + + def to_graph(e, recursive=True, verbose=False, @@ -189,7 +242,7 @@ def to_graph(e, # The compiled code should see everything the entry function saw. # TODO(mdan): This might not work well if the call tree spans modules? if tf_inspect.isfunction(e): - compiled_node.__dict__.update(six.get_function_globals(e)) + compiled_node.__dict__.update(inspect_utils.getnamespace(e)) compiled_fn = getattr(compiled_node, name) if verbose: -- GitLab From 0c47d9d9622724aabd41425aad482637b2245499 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 08:29:52 -0800 Subject: [PATCH 053/311] Tensorflow: adds additional debugging info to feed_dict failure condition. If you have a large feed dict, determining the type of each object can be difficult, and this additional debugging info helped me in such a case. PiperOrigin-RevId: 187179551 --- tensorflow/python/client/session.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index f3c4fecdc0..5737047c4b 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -1085,7 +1085,10 @@ class BaseSession(SessionInterface): if isinstance(subfeed_val, ops.Tensor): raise TypeError('The value of a feed cannot be a tf.Tensor object. ' 'Acceptable feed values include Python scalars, ' - 'strings, lists, numpy ndarrays, or TensorHandles.') + 'strings, lists, numpy ndarrays, or TensorHandles.' + 'For reference, the tensor object was ' + + str(feed_val) + ' which was passed to the ' + 'feed with key ' + str(feed) + '.') subfeed_dtype = subfeed_t.dtype.as_numpy_dtype if isinstance(subfeed_val, int) and _convert_to_numpy_obj( -- GitLab From 67545cd70ebec13c18159d105b0ce17bbfc7ac44 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 27 Feb 2018 09:52:00 -0800 Subject: [PATCH 054/311] Uses the new automatic control dependencies code for functions. PiperOrigin-RevId: 187189552 --- tensorflow/python/eager/function.py | 73 ++++++++++++++--------- tensorflow/python/eager/function_test.py | 14 ++--- tensorflow/python/eager/graph_callable.py | 12 +++- 3 files changed, 61 insertions(+), 38 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index b3317bd323..655eaf3a1e 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -36,6 +36,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.util import compat @@ -162,31 +163,15 @@ class CapturingGraph(ops.Graph): op_def=None, compute_shapes=True, compute_device=True): - # TODO(apassos) probably control flow has to be handled delicately here as - # in if a resource is accessed inside a control flow context we need the - # control dependency to point to something outside the context which is - # guaranteed to happen after the access. - # # TODO(apassos) this should do some form of alias analysis as ops which # forward the resources such as Identity and Switch can cause serialization # to fail. - resource_inputs = set() - control_inputs = set() for i, inp in enumerate(inputs): if inp.graph is not self: inputs[i] = capture_value(self.captures, inp, inp.dtype, inp.op.name) - inp = inputs[i] - if inp.dtype == dtypes_module.resource: - if inp.name in self._last_op_using_resource_tensor: - control_inputs.add(self._last_op_using_resource_tensor[inp.name]) - resource_inputs.add(inp.name) - with self.control_dependencies(list(control_inputs)): - op = super(CapturingGraph, self).create_op( - op_type, inputs, dtypes, input_types, name, attrs, op_def, - compute_shapes, compute_device) - for name in resource_inputs: - self._last_op_using_resource_tensor[name] = op - return op + return super(CapturingGraph, self).create_op( + op_type, inputs, dtypes, input_types, name, attrs, op_def, + compute_shapes, compute_device) # TODO(apassos): it'd be really nice if we could scope this registration. @@ -636,13 +621,15 @@ def _defun_internal(name, func, args, kwds): for collection in curr_graph.collections: tmp_graph.get_collection_ref(collection)[:] = curr_graph.get_collection( collection) - with tmp_graph.as_default(): + with tmp_graph.as_default(), AutomaticControlDependencies() as a: func_inputs = _get_defun_inputs(args) def convert(x): if x is None: return None - return ops.convert_to_tensor_or_indexed_slices(x) + x = ops.convert_to_tensor_or_indexed_slices(x) + x = a.mark_as_return(x) + return x with capture_tensors(captures): this_tape = tape.push_new_tape() @@ -887,7 +874,36 @@ class AutomaticControlDependencies(object): self._returned_tensors = set() def mark_as_return(self, tensor): + """Acts like identity but marks the `Tensor` as a return value. + + This will possibly return a copy of the `Tensor`. Usage: + + ``` + with AutomaticControlDependencies() as a: + ... + t = a.mark_as_return(t) + _ = ...(t...) # i.e. it's safe to use t here + ``` + + Args: + tensor: the `Tensor` to be marked + + Returns: + a copy of the `Tensor`. + """ + if isinstance(tensor, ops.IndexedSlices): + values = array_ops.identity(tensor.values) + indices = array_ops.identity(tensor.indices) + self._returned_tensors.add(indices) + self._returned_tensors.add(values) + return ops.IndexedSlices(values, indices, dense_shape=tensor.dense_shape) + # We want to make the return values depend on the stateful operations, but + # we don't want to introduce a cycle, so we make the return value the result + # of a new identity operation that the stateful operations definitely don't + # depend on. + tensor = array_ops.identity(tensor) self._returned_tensors.add(tensor) + return tensor def __enter__(self): if context.in_eager_mode(): @@ -1008,7 +1024,8 @@ class AutomaticControlDependencies(object): for op in new_operations: control_inputs = set() # Ensure stateful ops run - if self._graph._registered_ops[op.type].is_stateful: # pylint: disable=protected-access + if (op.type not in self._graph._registered_ops # pylint: disable=protected-access + or self._graph._registered_ops[op.type].is_stateful): # pylint: disable=protected-access ops_which_must_run.add(op) # Ignore switches (they're handled separately) if op.type == "Switch" and op.inputs[0].dtype == dtypes_module.resource: @@ -1044,9 +1061,10 @@ class AutomaticControlDependencies(object): # Ensure all ops which must run do run for r in self._returned_tensors: - r.op._add_control_inputs( # pylint: disable=protected-access - [o for o in ops_which_must_run - if o._control_flow_context is r.op._control_flow_context]) # pylint: disable=protected-access + if ops_which_must_run: + r.op._add_control_inputs( # pylint: disable=protected-access + [o for o in ops_which_must_run + if o._control_flow_context is r.op._control_flow_context]) # pylint: disable=protected-access def automatic_control_dependencies(f): @@ -1066,8 +1084,7 @@ def automatic_control_dependencies(f): def wrapper(*args, **kwds): with AutomaticControlDependencies() as a: result = f(*args, **kwds) - for t in nest.flatten(result): - a.mark_as_return(t) - return result + result_flat = [a.mark_as_return(t) for t in nest.flatten(result)] + return nest.pack_sequence_as(result, result_flat) return tf_decorator.make_decorator(f, wrapper) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 431d9388c0..b9cde16867 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -606,7 +606,7 @@ class AutomaticControlDependenciesTest(test.TestCase): v.assign(v + 1) v.assign(2 * v) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(), 4.0) def testCondMustRun(self): @@ -626,7 +626,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 6.0) @@ -647,7 +647,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) one = constant_op.constant(1.0) - c.mark_as_return(one) + one = c.mark_as_return(one) one.eval(feed_dict={p: False}) self.assertAllEqual(v.read_value().eval(), 5.0) one.eval(feed_dict={p: True}) @@ -681,7 +681,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) with ops.name_scope('final'): val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False, q: False}), 3.0) self.assertAllEqual(val.eval(feed_dict={p: False, q: True}), 6.0) self.assertAllEqual(val.eval(feed_dict={p: True, q: True}), 7.0) @@ -703,7 +703,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 5.0) @@ -724,7 +724,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 6.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 12.0) @@ -745,7 +745,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) v.assign(v * 2) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 10.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 20.0) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index 62106bf0e2..623f3564ad 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -279,9 +279,12 @@ def _graph_callable_internal(func, shape_and_dtypes): # scope's view of which variables exist. variable_captures = _VariableCapturingScope() with variable_captures.initializing_scope(), function.capture_tensors( - captures): + captures), function.AutomaticControlDependencies() as a: func_outputs = func(*func_inputs) - outputs_list = nest.flatten(func_outputs) + outputs_list = nest.flatten(func_outputs) + for i, x in enumerate(outputs_list): + if x is not None: + outputs_list[i] = a.mark_as_return(x) if len(outputs_list) == 1 and outputs_list[0] is None: outputs_list = [] output_shapes = [x.shape for x in outputs_list] @@ -294,9 +297,12 @@ def _graph_callable_internal(func, shape_and_dtypes): # knows about all variables. tmp_graph.clear_resource_control_flow_state() with variable_captures.capturing_scope(), function.capture_tensors( - captures): + captures), function.AutomaticControlDependencies() as a: captured_outputs = func(*func_inputs) captured_outlist = nest.flatten(captured_outputs) + for i, x in enumerate(captured_outlist): + if x is not None: + captured_outlist[i] = a.mark_as_return(x) capturing_operations = tmp_graph.get_operations()[ len(initializing_operations):] -- GitLab From f62f168fc3d59e3f067423fc39b4f5c3bfe2527a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:05:22 -0800 Subject: [PATCH 055/311] Make crosstools ready for introduction of c++-link-nodeps-dynamic-library PiperOrigin-RevId: 187191730 --- third_party/gpus/crosstool/CROSSTOOL_clang.tpl | 7 +++++++ third_party/toolchains/gpus/crosstool/CROSSTOOL | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/third_party/gpus/crosstool/CROSSTOOL_clang.tpl b/third_party/gpus/crosstool/CROSSTOOL_clang.tpl index e4363d6045..2f09473ee2 100644 --- a/third_party/gpus/crosstool/CROSSTOOL_clang.tpl +++ b/third_party/gpus/crosstool/CROSSTOOL_clang.tpl @@ -49,6 +49,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-lstdc++" } @@ -75,6 +76,7 @@ toolchain { name: "alwayslink" flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,-no-as-needed" @@ -116,6 +118,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-Wl,-z,relro,-z,now" } @@ -161,6 +164,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { # Stamp the binary with a unique identifier. flag: "-Wl,--build-id=md5" @@ -176,6 +180,7 @@ toolchain { action: "c++-compile" action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag:"-no-canonical-prefixes" } @@ -199,6 +204,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-B/usr/bin/" } @@ -246,6 +252,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,--gc-sections" diff --git a/third_party/toolchains/gpus/crosstool/CROSSTOOL b/third_party/toolchains/gpus/crosstool/CROSSTOOL index a47e0c7cd7..16ee2f82c6 100644 --- a/third_party/toolchains/gpus/crosstool/CROSSTOOL +++ b/third_party/toolchains/gpus/crosstool/CROSSTOOL @@ -53,6 +53,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-lstdc++" } @@ -79,6 +80,7 @@ toolchain { name: "alwayslink" flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,-no-as-needed" @@ -120,6 +122,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-Wl,-z,relro,-z,now" } @@ -165,6 +168,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { # Stamp the binary with a unique identifier. flag: "-Wl,--build-id=md5" @@ -180,6 +184,7 @@ toolchain { action: "c++-compile" action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag:"-no-canonical-prefixes" } @@ -203,6 +208,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-B/usr/bin/" } @@ -250,6 +256,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,--gc-sections" -- GitLab From 0e5458fb95b0b146838a3c61de31bb9497c613ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:05:38 -0800 Subject: [PATCH 056/311] Implement partial constant folding of AddN and AccumulateNV2. Change AccumulateNV2 to AddN if all inputs are constant, since constant folding doesn't work for the fake node type. PiperOrigin-RevId: 187191772 --- .../grappler/optimizers/constant_folding.cc | 78 ++++++++++++ .../optimizers/constant_folding_test.cc | 115 ++++++++++++++++-- 2 files changed, 184 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index a5417aaa51..32c8a9b2f5 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1493,6 +1493,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; for (int i = 0; i < output->node_size(); ++i) { NodeDef* node = output->mutable_node(i); + // Remove Shuffle or Reverse op over scalar values. if (use_shape_info && (IsShuffle(*node) || IsReverse(*node) || IsTranspose(*node))) { @@ -1839,6 +1840,83 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, std::swap(*node->mutable_input(parent_const_input), *op_child_node->mutable_input(non_const_leaf_input)); graph_modified_ = true; + continue; + } + + // Partial constant folding for associative operators: + // Split AddN/AccumulateNV2 to enable partial + // folding of ops when more than one but not all inputs are constant. + // For AddN and AccumulateNV2, we may furthermore reorder inputs, since + // addition is commutative. + // TODO(rmlarsen): Concat/Pack/ParallelConcat which are not commutative, so + // we have to preserve order and can only push consecutive runs of constant + // inputs into sub-nodes. + if (IsAggregate(*node) && IsCommutative(*node) && + NumNonControlInputs(*node) > 2) { + const int num_control_inputs = + node->input_size() - NumNonControlInputs(*node); + std::vector const_inputs; + std::vector nonconst_inputs; + for (int i = 0; i < node->input_size(); ++i) { + const string& input = node->input(i); + const NodeDef* input_node = node_map_->GetNode(NodeName(input)); + CHECK(input_node != nullptr) << input; + if (!IsControlInput(input) && IsReallyConstant(*input_node)) { + const_inputs.push_back(i); + } else { + // Non-const and control inputs. + nonconst_inputs.push_back(i); + } + } + // Promote AccumulateNV2 with all constant inputs to AddN, since it is + // a fake node that cannot be constant folded by itself. + if (const_inputs.size() == NumNonControlInputs(*node) && + node->op() == "AccumulateNV2") { + node->set_op("AddN"); + node->mutable_attr()->erase("shape"); + graph_modified_ = true; + continue; + } + const string new_node_name = OptimizedNodeName( + *node, strings::StrCat("_partial_split_", const_inputs.size())); + if (1 < const_inputs.size() && + const_inputs.size() < NumNonControlInputs(*node) && + !node_map_->NodeExists(new_node_name)) { + NodeDef* added_node = output->add_node(); + *added_node = *node; + // Always use AddN for the constant node, since AccumulateNV2 is a fake + // node that cannot be constant folded, since it does not have a kernel. + added_node->set_op("AddN"); + added_node->mutable_attr()->erase("shape"); + added_node->set_name(new_node_name); + node_map_->AddNode(added_node->name(), added_node); + added_node->clear_input(); + for (int i : const_inputs) { + added_node->add_input(node->input(i)); + node_map_->UpdateOutput(NodeName(node->input(i)), node->name(), + added_node->name()); + } + + // Overwrite the first const input with the added node. + node->set_input(const_inputs[0], added_node->name()); + node_map_->AddOutput(added_node->name(), node->name()); + nonconst_inputs.push_back(const_inputs[0]); + // Compact the remaining inputs to the original node. + std::sort(nonconst_inputs.begin(), nonconst_inputs.end()); + int idx = 0; + for (int i : nonconst_inputs) { + if (idx != i) { + node->set_input(idx, node->input(i)); + } + ++idx; + } + node->mutable_input()->DeleteSubrange(nonconst_inputs.size(), + const_inputs.size() - 1); + (*node->mutable_attr())["N"].set_i(node->input_size() - + num_control_inputs); + (*added_node->mutable_attr())["N"].set_i(const_inputs.size()); + graph_modified_ = true; + } } } diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index c6540192d7..3149e1d53e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -187,20 +187,21 @@ TEST_F(ConstantFoldingTest, NeutralElement) { Output bias_add2 = ops::BiasAdd(s.WithOpName("bias_add2"), zeros, bias); Output sub1 = ops::Sub(s.WithOpName("sub1"), x, zeros); Output sub2 = ops::Sub(s.WithOpName("sub2"), zeros, y); - Output addn = - ops::AddN(s.WithOpName("addn"), - {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, - matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}); + Output concat = + ops::Concat(s.WithOpName("concat"), + {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, + matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}, + 0); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - item.fetch = {"addn", "matmul3", "matmul4"}; + item.fetch = {"concat", "matmul3", "matmul4"}; ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(27, output.node_size()); + EXPECT_EQ(28, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); const string& name = node.name(); @@ -414,7 +415,6 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - LOG(INFO) << output.DebugString(); EXPECT_EQ(15, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { @@ -1547,8 +1547,105 @@ TEST_F(ConstantFoldingTest, SwitchIdenticalInputs) { EXPECT_EQ(6, found); } +TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { + std::function addn_fun = + [](const Scope& scope, InputList inputs) { + return ops::AddN(scope, inputs); + }; + std::function accumulate_fun = + [](const Scope& scope, InputList inputs) { + return ops::AccumulateNV2(scope, inputs, TensorShape({2, 2})); + }; + for (bool use_add_n : {true, false}) { + auto fun = use_add_n ? addn_fun : accumulate_fun; + const string op_name = use_add_n ? "AddN" : "AccumulateNV2"; + Scope s = Scope::NewRootScope(); + Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output z = ops::Placeholder(s.WithOpName("z"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output c1 = ops::Const(s.WithOpName("c1"), 1.0f, {2, 2}); + Output c2 = ops::Const(s.WithOpName("c2"), 2.0f, {2, 2}); + Output c3 = ops::Const(s.WithOpName("c3"), 3.0f, {2, 2}); + Output acc0 = fun(s.WithOpName("acc0"), {c1, c2, c3}); + Output acc1 = fun(s.WithOpName("acc1"), {x, y, z}); + Output acc2 = fun(s.WithOpName("acc2"), {c1, x, y}); + Output acc3 = fun(s.WithOpName("acc3"), {c1, c2, z}); + Output acc4 = fun(s.WithOpName("acc4"), {c1, y, c2}); + Output acc5 = fun(s.WithOpName("acc5"), {x, c1, c2}); + Output acc6 = fun(s.WithOpName("acc6"), {x, c1, y, c2}); + Output concat = ops::Concat(s.WithOpName("concat"), + {acc0, acc1, acc2, acc3, acc4, acc5, acc6}, 0); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {"concat"}; + + ConstantFolding optimizer(nullptr /* cpu_device */); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(17, output.node_size()); + for (const NodeDef& node : output.node()) { + if (node.name() == "acc0") { + EXPECT_EQ("Const", node.op()); + } + if (node.name() == "acc1") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("z", node.input(2)); + } + if (node.name() == "acc2") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("c1", node.input(0)); + EXPECT_EQ("x", node.input(1)); + EXPECT_EQ("y", node.input(2)); + } + if (node.name() == "acc3") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("ConstantFolding/acc3_partial_split_2", node.input(0)); + EXPECT_EQ("z", node.input(1)); + } + if (node.name() == "acc4") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("ConstantFolding/acc4_partial_split_2", node.input(0)); + EXPECT_EQ("y", node.input(1)); + } + if (node.name() == "acc5") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("ConstantFolding/acc5_partial_split_2", node.input(1)); + } + if (node.name() == "acc6") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("ConstantFolding/acc6_partial_split_2", node.input(1)); + EXPECT_EQ("y", node.input(2)); + } + if (StringPiece(node.name()).starts_with("ConstantFolding/")) { + EXPECT_EQ("Const", node.op()); + } + } + + std::vector fetch = {"acc0"}; + auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors = EvaluateNodes(output, fetch); + EXPECT_EQ(1, tensors_expected.size()); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); + } +} + } // namespace } // namespace grappler } // namespace tensorflow - -// LocalWords: NewRootScope -- GitLab From e929b16dc89f62a41bcaba57b98ddd221bf9bf68 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Tue, 27 Feb 2018 10:25:17 -0800 Subject: [PATCH 057/311] Lint fixes. PiperOrigin-RevId: 187194778 --- tensorflow/python/util/tf_inspect.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index a7cead5555..4ab8a72a83 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -46,8 +46,10 @@ def getargspec(object): # pylint: disable=redefined-builtin def getfullargspec(obj): # pylint: disable=redefined-builtin - """TFDecorator-aware replacement for inspect.getfullargspec and fallback to - inspect.getargspec in Python 2. + """TFDecorator-aware replacement for `inspect.getfullargspec`/`getargspec`. + + This wrapper uses `inspect.getfullargspec` if available and falls back to + `inspect.getargspec` in Python 2. Args: obj: A callable, possibly decorated. -- GitLab From e20be23387a6c1b72f3e34d03d4206c3211c921a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:27:28 -0800 Subject: [PATCH 058/311] Make block-based pruning more general, allowing it to operate on higher-dimensional arrays that can be squeezed to 2-dimensional. PiperOrigin-RevId: 187195105 --- tensorflow/contrib/model_pruning/README.md | 2 +- .../contrib/model_pruning/python/pruning.py | 21 ++++++++++++------- .../model_pruning/python/pruning_test.py | 17 +++++++++++++++ 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md index d286750c25..52b659c69f 100644 --- a/tensorflow/contrib/model_pruning/README.md +++ b/tensorflow/contrib/model_pruning/README.md @@ -134,7 +134,7 @@ $ bazel-bin/$examples_dir/cifar10/cifar10_eval --run_once ### Block Sparsity -For some hardware architectures, it may be beneficial to induce spatially correlated sparsity. To train models in which the weight tensors have block sparse structure, set *block_height* and *block_width* hyperparameters to the desired block configuration (2x2, 4x4, 4x1, 1x8, etc). Currently, block sparsity is supported for weight tensors with rank 2 only. The matrix is partitioned into non-overlapping blocks of size *[block_height, block_dim]* and the either the average or max absolute value in this block is taken as a proxy for the entire block (set by *block_pooling_function* hyperparameter). +For some hardware architectures, it may be beneficial to induce spatially correlated sparsity. To train models in which the weight tensors have block sparse structure, set *block_height* and *block_width* hyperparameters to the desired block configuration (2x2, 4x4, 4x1, 1x8, etc). Currently, block sparsity is only supported for weight tensors which can be squeezed to rank 2. The matrix is partitioned into non-overlapping blocks of size *[block_height, block_dim]* and the either the average or max absolute value in this block is taken as a proxy for the entire block (set by *block_pooling_function* hyperparameter). The convolution layer tensors are always pruned used block dimensions of [1,1]. ## References diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index d16af9da19..86963be4b8 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -523,7 +523,8 @@ class Pruning(object): """Performs block-granular masking of the weights. Block pruning occurs only if the block_height or block_width is > 1 and - if the weight tensor has ndims = 2. Otherwise, elementwise pruning occurs. + if the weight tensor, when squeezed, has ndims = 2. Otherwise, elementwise + pruning occurs. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new @@ -540,7 +541,8 @@ class Pruning(object): Raises: ValueError: if block pooling function is not AVG or MAX """ - if weights.get_shape().ndims != 2 or self._block_dim == [1, 1]: + squeezed_weights = array_ops.squeeze(weights) + if squeezed_weights.get_shape().ndims != 2 or self._block_dim == [1, 1]: return self._update_mask(weights, threshold) if self._block_pooling_function not in ['AVG', 'MAX']: @@ -549,9 +551,11 @@ class Pruning(object): with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs( - array_ops.reshape( - weights, [1, weights.get_shape()[0], - weights.get_shape()[1], 1])) + array_ops.reshape(weights, [ + 1, + squeezed_weights.get_shape()[0], + squeezed_weights.get_shape()[1], 1 + ])) pool_window = [self._block_dim[0], self._block_dim[1]] pooled_weights = nn_ops.pool( abs_weights, @@ -572,9 +576,10 @@ class Pruning(object): array_ops.ones(self._block_dim)) sliced_mask = array_ops.slice( updated_mask, [0, 0], - [weights.get_shape()[0], - weights.get_shape()[1]]) - return smoothed_threshold, sliced_mask + [squeezed_weights.get_shape()[0], + squeezed_weights.get_shape()[1]]) + return smoothed_threshold, array_ops.reshape(sliced_mask, + array_ops.shape(weights)) def _get_mask_assign_ops(self): # Make sure the assignment ops have not already been added to the list diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py index 1767b4bb94..89e6571319 100644 --- a/tensorflow/contrib/model_pruning/python/pruning_test.py +++ b/tensorflow/contrib/model_pruning/python/pruning_test.py @@ -140,6 +140,23 @@ class PruningTest(test.TestCase): [0.0, -0.3, 0.0, -0.4]]) expected_mask = [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]] + self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max, + expected_mask) + self._blockMasking(param_list + ["block_pooling_function=AVG"], weights_avg, + expected_mask) + + def testBlockMaskingWithHigherDimensions(self): + param_list = ["block_height=2", "block_width=2", "threshold_decay=0"] + + # Weights as in testBlockMasking, but with one extra dimension. + weights_avg = constant_op.constant( + [[[0.1, 0.1, 0.2, 0.2], [0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4], + [0.3, 0.3, 0.4, 0.4]]]) + weights_max = constant_op.constant( + [[[0.1, 0.0, 0.2, 0.0], [0.0, -0.1, 0.0, -0.2], [0.3, 0.0, 0.4, 0.0], + [0.0, -0.3, 0.0, -0.4]]]) + expected_mask = [[[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]] + self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max, expected_mask) self._blockMasking(param_list + ["block_pooling_function=AVG"], -- GitLab From 38bda430f4d302c762bc2a0b74721d82b9c5cca4 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 27 Feb 2018 10:30:41 -0800 Subject: [PATCH 059/311] [TF CriticalSection] Bugfix: deref the Mutex before calling done_() This avoids an error wherein the Mutex destructor is called from the same thread as its threadpool, thus leading to a pthread 35 error. If the mutex is dereferenced before done_ is called, then the destruction is delayed until after done_() is called, and this happens in a different thread from the threadpool. PiperOrigin-RevId: 187195628 --- tensorflow/core/kernels/mutex_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mutex_ops.cc b/tensorflow/core/kernels/mutex_ops.cc index b8b1fc7679..b02a584d73 100644 --- a/tensorflow/core/kernels/mutex_ops.cc +++ b/tensorflow/core/kernels/mutex_ops.cc @@ -190,7 +190,6 @@ class MutexLockOp : public AsyncOpKernel { // End of bound arguments. const Status& s, Mutex::SharedLockReleaser&& lock) { - core::ScopedUnref unref(mutex); VLOG(2) << "Finished locking mutex " << mutex << " with lock: " << lock.shared_lock.get() << " status: " << s.ToString(); @@ -199,6 +198,7 @@ class MutexLockOp : public AsyncOpKernel { } else { c->SetStatus(s); } + mutex->Unref(); done_(); }, std::move(done), std::placeholders::_1, std::placeholders::_2)); -- GitLab From 8ccc858d11f913e63cf3e35523bc3121684c2a82 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:49:41 -0800 Subject: [PATCH 060/311] Add 8bit Tanh support to tflite Allow output datatypes for custom ops to be more than the output types used in the graph. When an op has multiple outputs, some of them not used will be optimized away. This results in a failure. The change in propagate_array_data_types.cc fix this. PiperOrigin-RevId: 187198815 --- .../contrib/lite/kernels/activations.cc | 40 ++++++++++++++++++- .../contrib/lite/kernels/activations_test.cc | 29 ++++++++++++++ .../propagate_array_data_types.cc | 7 +++- 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc index 6acded3091..093761c43c 100644 --- a/tensorflow/contrib/lite/kernels/activations.cc +++ b/tensorflow/contrib/lite/kernels/activations.cc @@ -63,6 +63,33 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArrayCopy(input->dims)); } +TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + if (input->type == kTfLiteUInt8) { + static constexpr int kInputIntegerBits = 4; + + const double input_real_multiplier = + input->params.scale * + static_cast(1 << (31 - kInputIntegerBits)); + + QuantizeMultiplierGreaterThanOne(input_real_multiplier, + &data->input_multiplier, + &data->input_left_shift); + data->input_range_radius = + CalculateInputRadius(kInputIntegerBits, data->input_left_shift); + } + + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = reinterpret_cast(node->user_data); @@ -180,6 +207,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); switch (input->type) { @@ -191,6 +219,14 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { for (; in < in_end; in++, out++) *out = std::tanh(*in); return kTfLiteOk; } break; + case kTfLiteUInt8: { + optimized_ops::Tanh(GetTensorData(input), GetTensorDims(input), + input->params.zero_point, data->input_range_radius, + data->input_multiplier, data->input_left_shift, + GetTensorData(output), + GetTensorDims(output)); + return kTfLiteOk; + } break; default: context->ReportError(context, "Only float32 supported currently."); return kTfLiteError; @@ -376,8 +412,8 @@ TfLiteRegistration* Register_RELU6() { } TfLiteRegistration* Register_TANH() { - static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, - activations::GenericPrepare, + static TfLiteRegistration r = {activations::Init, activations::Free, + activations::TanhPrepare, activations::TanhEval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/activations_test.cc b/tensorflow/contrib/lite/kernels/activations_test.cc index 302e52b96d..b9a96e3f79 100644 --- a/tensorflow/contrib/lite/kernels/activations_test.cc +++ b/tensorflow/contrib/lite/kernels/activations_test.cc @@ -52,6 +52,14 @@ class BaseActivationsOpModel : public SingleOpModel { BuildInterpreter({GetShape(input_)}); } + BaseActivationsOpModel(BuiltinOperator type, const TensorData &input, + const TensorData &output) { + input_ = AddInput(input); + output_ = AddOutput(output); + SetBuiltinOp(type, BuiltinOptions_NONE, 0); + BuildInterpreter({GetShape(input_)}); + } + protected: int input_; int output_; @@ -143,6 +151,27 @@ TEST(FloatActivationsOpTest, Tanh) { }))); } +TEST(QuantizedActivationsOpTest, Tanh) { + QuantizedActivationsOpModel m( + BuiltinOperator_TANH, + /*input=*/{TensorType_UINT8, {1, 2, 4, 1}, -8, 8}, + /*output=*/{TensorType_UINT8, {1, 2, 4, 1}, -1, 1}); + m.SetInput({ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 0.0, -0.999987, 0.964027, 0.999329, // + -0.996078, -0.96402, 0.99999, 0.76159, // + }, + 4 * (1. / 256)))); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({128, 0, 251, 255, 0, 5, 255, 226})); +} + TEST(FloatActivationsOpTest, Sigmoid) { FloatActivationsOpModel m(BuiltinOperator_LOGISTIC, /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc index f0d107232b..bde947f78d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -97,10 +97,13 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { SetDataTypeForAllOutputs(model, op, data_type); } else if (op->type == OperatorType::kTensorFlowUnsupported) { auto* unsupported_op = static_cast(op); - if (unsupported_op->output_data_types.size() != op->outputs.size()) { + // Some output tensors from the op could be eliminated by optimization. + // This can make unsupported_op->output_data_types have more elements than + // op->outputs. + if (unsupported_op->output_data_types.size() < op->outputs.size()) { return false; } - for (int i = 0; i < unsupported_op->output_data_types.size(); ++i) { + for (int i = 0; i < op->outputs.size(); ++i) { auto output = op->outputs[i]; auto data_type = unsupported_op->output_data_types[i]; model->GetArray(output).data_type = data_type; -- GitLab From 6a6661bbdce2172d27bf501e26baf09e8a658657 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Feb 2018 11:01:10 -0800 Subject: [PATCH 061/311] Function optimization: added an optimizer to automatically inline functions in order to enable Grappler to optimize the body of functions. Inlining also reduces the overhead of evaluating function. PiperOrigin-RevId: 187200883 --- .../core/grappler/grappler_item_builder.cc | 108 -------- .../core/grappler/grappler_item_builder.h | 7 - .../grappler/grappler_item_builder_test.cc | 199 --------------- tensorflow/core/grappler/optimizers/BUILD | 38 +++ .../grappler/optimizers/function_optimizer.cc | 148 +++++++++++ .../grappler/optimizers/function_optimizer.h | 43 ++++ .../optimizers/function_optimizer_test.cc | 98 ++++++++ tensorflow/core/grappler/utils/BUILD | 32 +++ tensorflow/core/grappler/utils/functions.cc | 140 +++++++++++ tensorflow/core/grappler/utils/functions.h | 39 +++ .../core/grappler/utils/functions_test.cc | 232 ++++++++++++++++++ .../core/grappler/utils/grappler_test.cc | 4 +- 12 files changed, 772 insertions(+), 316 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/function_optimizer.cc create mode 100644 tensorflow/core/grappler/optimizers/function_optimizer.h create mode 100644 tensorflow/core/grappler/optimizers/function_optimizer_test.cc create mode 100644 tensorflow/core/grappler/utils/functions.cc create mode 100644 tensorflow/core/grappler/utils/functions.h create mode 100644 tensorflow/core/grappler/utils/functions_test.cc diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 5ac52eefe1..606807b9e9 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -518,113 +518,5 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( return new_item; } -std::unique_ptr GrapplerItemFromFunctionDef( - const FunctionDef& func, - const std::unordered_map& func_attr, - const FunctionDefLibrary& library) { - if (func.signature().name().empty()) { - LOG(ERROR) << "function name must be specified."; - return nullptr; - } - std::unique_ptr new_item(new GrapplerItem()); - new_item->id = func.signature().name(); - - std::unordered_map port_map; - - // Add the function inputs as placeholder - for (const auto& inp : func.signature().input_arg()) { - NodeDef* ph = new_item->graph.add_node(); - ph->set_name(inp.name()); - ph->set_op("Placeholder"); - if (inp.type() != DT_INVALID) { - (*ph->mutable_attr())["T"].set_type(inp.type()); - } else { - auto it = func_attr.find(inp.type_attr()); - if (it == func_attr.end()) { - LOG(ERROR) << "Unknown type attribute " << inp.type_attr() - << " for function input " << inp.name(); - return nullptr; - } else { - (*ph->mutable_attr())["T"] = it->second; - } - } - port_map[inp.name()] = inp.name(); - } - - // Add the function body to the graph. - FunctionLibraryDefinition func_def(OpRegistry::Global(), library); - - for (const NodeDef& node : func.node_def()) { - NodeDef* new_node = new_item->graph.add_node(); - *new_node = node; - // Replace the placeholder attribute values with the specified value. - for (auto& attr : *new_node->mutable_attr()) { - const string& ph_name = attr.second.placeholder(); - auto it = func_attr.find(ph_name); - if (it != func_attr.end()) { - attr.second = it->second; - } - } - - // Functions use a custom format to encode connectivity. Map these custom - // strings to regular ones. - const OpRegistrationData* registration; - Status status = func_def.LookUp(node.op(), ®istration); - if (!status.ok()) { - LOG(ERROR) << "Op " << node.op() << " not registered: " << status; - return nullptr; - } - - tensorflow::NameRangeMap inputs; - tensorflow::NameRangeMap outputs; - status = tensorflow::NameRangesForNode(node, registration->op_def, &inputs, - &outputs); - if (!status.ok()) { - LOG(ERROR) << "Op " << node.op() << " invalid: " << status; - return nullptr; - } - for (const auto& name_range : outputs) { - string port_prefix = - strings::StrCat(node.name(), ":", name_range.first, ":"); - int index_start = name_range.second.first; - int index_end = name_range.second.second; - for (int i = index_start; i < index_end; ++i) { - string port_id = strings::StrCat(port_prefix, i - index_start); - string port_name = strings::StrCat(node.name(), ":", i); - port_map[port_id] = port_name; - } - } - } - - for (auto& node : *new_item->graph.mutable_node()) { - // Rewrite the inputs to use the normal naming convention. - for (int i = 0; i < node.input_size(); ++i) { - const string& input = node.input(i); - if (IsControlInput(input)) { - // No need to remap control dependencies. - continue; - } else { - auto it = port_map.find(input); - if (it == port_map.end()) { - LOG(ERROR) << "Unknown input: " << input; - return nullptr; - } - node.set_input(i, it->second); - } - } - } - - // Add the function outputs to the list of fetch nodes. - for (const auto& out : func.signature().output_arg()) { - new_item->fetch.emplace_back(out.name()); - } - // Add the function inputs to the list of feeds. - for (const auto& inp : func.signature().input_arg()) { - new_item->feed.emplace_back(inp.name(), Tensor()); - } - - return new_item; -} - } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h index e892a3f556..c877d91163 100644 --- a/tensorflow/core/grappler/grappler_item_builder.h +++ b/tensorflow/core/grappler/grappler_item_builder.h @@ -58,13 +58,6 @@ struct ItemConfig { std::unique_ptr GrapplerItemFromMetaGraphDef( const string& id, const MetaGraphDef& meta_graph, const ItemConfig& cfg); -// Factory method for creating a GrapplerItem from a FunctionDef. -// Returns nullptr if the given function def cannot be converted. -std::unique_ptr GrapplerItemFromFunctionDef( - const FunctionDef& func, - const std::unordered_map& func_attr, - const FunctionDefLibrary& library); - } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc index 68437b6041..ef95992af7 100644 --- a/tensorflow/core/grappler/grappler_item_builder_test.cc +++ b/tensorflow/core/grappler/grappler_item_builder_test.cc @@ -280,205 +280,6 @@ TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) { ASSERT_TRUE(item != nullptr); } -TEST_F(GrapplerItemBuilderTest, FromSimpleFunctionDef) { - const Tensor kTwo = test::AsScalar(2); - FunctionDef func = FunctionDefHelper::Define( - // Name - "XTimesTwo", - // Args - {"x: T"}, - // Return values - {"y: T"}, - // Attr def - {"T: {float, double, int32, int64}"}, - // Nodes - { - {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, - {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, - {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}}, - }); - - std::unordered_map func_attr; - func_attr["T"].set_type(DT_FLOAT); - FunctionDefLibrary library; - std::unique_ptr item = - GrapplerItemFromFunctionDef(func, func_attr, library); - CHECK(item); - EXPECT_EQ("XTimesTwo", item->id); - EXPECT_EQ(4, item->graph.node_size()); - EXPECT_EQ(std::vector({"y"}), item->fetch); - EXPECT_EQ(1, item->feed.size()); - EXPECT_EQ("x", item->feed[0].first); - - for (const NodeDef &node : item->graph.node()) { - if (node.name() == "x") { - EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "two") { - EXPECT_EQ("Const", node.op()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "scale") { - EXPECT_EQ("Cast", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("DstT").type()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("two:0", node.input(0)); - } else if (node.name() == "y") { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("scale:0", node.input(1)); - } - } -} - -TEST_F(GrapplerItemBuilderTest, FromFunctionDefWithMultiOutputNodes) { - // Gradient graph for the Subtract operation - std::vector nodes = { - {{"sx"}, "Shape", {"x"}}, - {{"sy"}, "Shape", {"y"}}, - {{"gx"}, "Identity", {"dz"}}, - {{"gy"}, "Neg", {"dz"}}, - {{"rx", "ry"}, "BroadcastGradientArgs", {"sx", "sy"}}, - {{"sum_gx"}, "Sum", {"gx", "rx"}}, - {{"dx"}, "Reshape", {"sum_gx", "sx"}}, - {{"sum_gy"}, "Sum", {"gy", "ry"}}, - {{"dy"}, "Reshape", {"sum_gy", "sy"}}, - }; - - for (auto &n : nodes) { - // "BroadcastGradientArgs" doesn't need any attrs. - if (n.attr.empty() && n.op != "BroadcastGradientArgs") { - n.attr = {{"T", "$T"}}; - } - } - FunctionDef func = FunctionDefHelper::Define( - // Name - "SubGrad", - // Arg defs - {"x: T", "y: T", "dz: T"}, - // Ret val defs - {"dx: T", "dy: T"}, - // Attr defs - {{"T: {half, float, double}"}}, - // Nodes - nodes); - - std::unordered_map func_attr; - func_attr["T"].set_type(DT_FLOAT); - FunctionDefLibrary library; - std::unique_ptr item = - GrapplerItemFromFunctionDef(func, func_attr, library); - CHECK(item); - EXPECT_EQ("SubGrad", item->id); - EXPECT_EQ(12, item->graph.node_size()); - EXPECT_EQ(std::vector({"dx", "dy"}), item->fetch); - EXPECT_EQ(3, item->feed.size()); - EXPECT_EQ("x", item->feed[0].first); - EXPECT_EQ("y", item->feed[1].first); - EXPECT_EQ("dz", item->feed[2].first); - - for (const NodeDef &node : item->graph.node()) { - if (node.name() == "x" || node.name() == "y" || node.name() == "dz") { - EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "rx") { - EXPECT_EQ("BroadcastGradientArgs", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("sx:0", node.input(0)); - EXPECT_EQ("sy:0", node.input(1)); - } else if (node.name() == "sum_gx") { - EXPECT_EQ("Sum", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("gx:0", node.input(0)); - EXPECT_EQ("rx:0", node.input(1)); - } else if (node.name() == "sum_gy") { - EXPECT_EQ("Sum", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("gy:0", node.input(0)); - EXPECT_EQ("rx:1", node.input(1)); - } - } -} - -TEST_F(GrapplerItemBuilderTest, FromFunctionDefWithNestedFuncs) { - FunctionDefLibrary library; - *library.add_function() = FunctionDefHelper::Define( - // Name - "Swap", - // Args - {"i0: T", "i1: T"}, - // Return values - {"o0: T", "o1: T"}, - // Attr def - {"T: {float, double}"}, - // Nodes - {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}}, - {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}}); - - FunctionDef func = FunctionDefHelper::Create( - // Name - "ManySwapsFirst", - // Args - {"x: float", "y: float"}, - // Return values - {"o: float"}, - // attr def - {}, - // Nodes - // o = x*x + y*y. Furthermore, The 1st swap depends on x2, and - // y2 depends on the 2nd swap. The 2nd swap has data dependency - // on the 1st swap. - {{{"a0"}, "Swap", {"x", "y"}, {{"T", DT_FLOAT}}, {"x2"}}, - {{"a1"}, "Swap", {"a0:o0:0", "a0:o1:0"}, {{"T", DT_FLOAT}}}, - {{"x2"}, "Mul", {"x", "x"}, {{"T", DT_FLOAT}}}, - {{"y2"}, "Mul", {"y", "y"}, {{"T", DT_FLOAT}}, {"a1"}}, - {{"o"}, "Add", {"x2:z:0", "y2:z:0"}, {{"T", DT_FLOAT}}}}, - {{"o", "o:z:0"}}); - - std::unordered_map func_attr; - func_attr["T"].set_type(DT_FLOAT); - std::unique_ptr item = - GrapplerItemFromFunctionDef(func, func_attr, library); - - for (const NodeDef &node : item->graph.node()) { - if (node.name() == "x" || node.name() == "y") { - EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "a0") { - EXPECT_EQ("Swap", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("y", node.input(1)); - EXPECT_EQ("^x2", node.input(2)); - } else if (node.name() == "a1") { - EXPECT_EQ("Swap", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("a0:0", node.input(0)); - EXPECT_EQ("a0:1", node.input(1)); - } else if (node.name() == "x2") { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("x", node.input(1)); - } else if (node.name() == "y2") { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("y", node.input(0)); - EXPECT_EQ("y", node.input(1)); - EXPECT_EQ("^a1", node.input(2)); - } else if (node.name() == "o") { - EXPECT_EQ("Add", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("x2:0", node.input(0)); - EXPECT_EQ("y2:0", node.input(1)); - } - } -} - } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index a52d1c8df2..bd41854c41 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -132,6 +132,44 @@ tf_cc_test( ], ) +cc_library( + name = "function_optimizer", + srcs = ["function_optimizer.cc"], + hdrs = [ + "function_optimizer.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_optimizer", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler/utils:functions", + ], +) + +tf_cc_test( + name = "function_optimizer_test", + srcs = ["function_optimizer_test.cc"], + deps = [ + ":function_optimizer", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/core:all_kernels", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:direct_session", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/utils:grappler_test", + ], +) + cc_library( name = "graph_rewriter", srcs = ["graph_rewriter.cc"], diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc new file mode 100644 index 0000000000..efc4f2f4bd --- /dev/null +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -0,0 +1,148 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/function_optimizer.h" +#include +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils/functions.h" + +namespace tensorflow { +namespace grappler { + +Status InlineFunction(const NodeDef& node, const FunctionDef& func, + GraphDef* graph) { + const std::unordered_map attr(node.attr().begin(), + node.attr().end()); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, attr, library); + + std::unordered_map input_nodes; + for (int i = 0; i < func.signature().input_arg_size(); ++i) { + const OpDef::ArgDef& arg = func.signature().input_arg(i); + input_nodes[arg.name()] = i; + } + + // Add an IdentityN op to hook the function inputs to: this ensures that + // they're all evaluated before the evaluation of the function body starts. + NodeDef* func_inputs = graph->add_node(); + func_inputs->set_name(strings::StrCat(node.name(), "/", "inlined_inputs")); + func_inputs->set_op("IdentityN"); + *func_inputs->mutable_input() = node.input(); + AttrValue::ListValue* type_list = + (*func_inputs->mutable_attr())["T"].mutable_list(); + for (const OpDef::ArgDef& arg : func.signature().input_arg()) { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid input argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); + } + + for (NodeDef& func_body_node : *item->graph.mutable_node()) { + if (input_nodes.find(func_body_node.name()) != input_nodes.end()) { + // Turn input placeholders into identity nodes + if (IsPlaceholder(func_body_node)) { + func_body_node.set_op("Identity"); + } + CHECK_EQ(0, func_body_node.input_size()); + int input_id = input_nodes[func_body_node.name()]; + func_body_node.add_input( + strings::StrCat(func_inputs->name(), ":", input_id)); + } else { + // Update the input names. + for (string& input : *func_body_node.mutable_input()) { + input = strings::StrCat(node.name(), "/", input); + } + } + + // Add the node name as a prefix to avoid collisions after inlining + func_body_node.set_name( + strings::StrCat(node.name(), "/", func_body_node.name())); + + // Move the node to the main graph + graph->add_node()->Swap(&func_body_node); + } + + // Add an IdentityN op to hook the function outputs to: this ensures that the + // function body is fully evaluated before its fanout gets scheduled. + NodeDef* func_outputs = graph->add_node(); + func_outputs->set_name(node.name()); + func_outputs->set_op("IdentityN"); + type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); + for (const OpDef::ArgDef& arg : func.signature().output_arg()) { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid output argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); + func_outputs->add_input(strings::StrCat(node.name(), "/", arg.name())); + } + + return Status::OK(); +} + +Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + std::unordered_map functions; + for (const FunctionDef& func : item.graph.library().function()) { + if (func.attr().count("_noinline") == 0) { + functions[func.signature().name()] = &func; + } + } + + // Nothing to do. + if (functions.empty()) { + *optimized_graph = item.graph; + return Status::OK(); + } + + // Inline functions when possible. + for (const NodeDef& node : item.graph.node()) { + auto it = functions.find(node.op()); + if (it == functions.end()) { + *optimized_graph->add_node() = node; + } else { + TF_RETURN_IF_ERROR(InlineFunction(node, *it->second, optimized_graph)); + } + } + + // TODO(bsteiner): specialize the implementation of functions that can't be + // inlined based on the context in which they're instantiated. + + // TODO(bsteiner): trim the library to remove unused function definitions + *optimized_graph->mutable_library() = item.graph.library(); + *optimized_graph->mutable_versions() = item.graph.versions(); + + return Status::OK(); +} + +void FunctionOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, + double result) { + // Nothing to do for FunctionOptimizer. +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.h b/tensorflow/core/grappler/optimizers/function_optimizer.h new file mode 100644 index 0000000000..5c80226e9d --- /dev/null +++ b/tensorflow/core/grappler/optimizers/function_optimizer.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_OPTIMIZERS_FUNCTION_OPTIMIZER_H_ +#define TENSORFLOW_GRAPPLER_OPTIMIZERS_FUNCTION_OPTIMIZER_H_ + +#include "tensorflow/core/grappler/optimizers/graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +// Remap TensorFlow subgraphs onto alternative operations or collection of +// operations to make the overall graph more efficient. +class FunctionOptimizer : public GraphOptimizer { + public: + FunctionOptimizer() {} + ~FunctionOptimizer() override {} + + string name() const override { return "function_optimizer"; }; + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_OPTIMIZERS_FUNCTION_OPTIMIZER_H_ diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc new file mode 100644 index 0000000000..b8e05a5296 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -0,0 +1,98 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/function_optimizer.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils/grappler_test.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class FunctionOptimizerTest : public GrapplerTest {}; + +TEST_F(FunctionOptimizerTest, SimpleFunction) { + // Build a graph to compute y = XTimesTwo(x) + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + test::function::XTimesTwo(), + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int count = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "y/inlined_inputs") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("x", node.input(0)); + } else if (node.name() == "y/x") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/inlined_inputs:0", node.input(0)); + } else if (node.name() == "y/two") { + count++; + EXPECT_EQ("Const", node.op()); + } else if (node.name() == "y/scale") { + count++; + EXPECT_EQ("Cast", node.op()); + } else if (node.name() == "y/y") { + count++; + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("y/x", node.input(0)); + EXPECT_EQ("y/scale:0", node.input(1)); + } else if (node.name() == "y") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/y", node.input(0)); + } else if (node.name() == "z") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y", node.input(0)); + } + } + EXPECT_EQ(7, count); + + item.fetch = {"z"}; + Tensor pi(DT_FLOAT, {}); + pi.flat()(0) = 3.14f; + item.feed.emplace_back("x", pi); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 5d32609434..fc05713494 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -146,3 +146,35 @@ cc_library( "//tensorflow/core/grappler:utils", ], ) + +cc_library( + name = "functions", + srcs = [ + "functions.cc", + ], + hdrs = ["functions.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + ], +) + +tf_cc_test( + name = "functions_test", + srcs = ["functions_test.cc"], + deps = [ + ":functions", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:all_kernels", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc new file mode 100644 index 0000000000..37b00e0a30 --- /dev/null +++ b/tensorflow/core/grappler/utils/functions.cc @@ -0,0 +1,140 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/grappler/utils/functions.h" + +#include + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/graph_def_util.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/grappler/utils.h" + +namespace tensorflow { +namespace grappler { + +std::unique_ptr GrapplerItemFromFunctionDef( + const FunctionDef& func, + const std::unordered_map& func_attr, + const FunctionDefLibrary& library) { + if (func.signature().name().empty()) { + LOG(ERROR) << "function name must be specified."; + return nullptr; + } + std::unique_ptr new_item(new GrapplerItem()); + new_item->id = func.signature().name(); + + std::unordered_map port_map; + + // Add the function inputs as placeholder + for (const auto& inp : func.signature().input_arg()) { + NodeDef* ph = new_item->graph.add_node(); + ph->set_name(inp.name()); + ph->set_op("Placeholder"); + if (inp.type() != DT_INVALID) { + (*ph->mutable_attr())["T"].set_type(inp.type()); + } else { + auto it = func_attr.find(inp.type_attr()); + if (it == func_attr.end()) { + LOG(ERROR) << "Unknown type attribute " << inp.type_attr() + << " for function input " << inp.name(); + return nullptr; + } else { + (*ph->mutable_attr())["T"] = it->second; + } + } + port_map[inp.name()] = inp.name(); + } + + // Add the function body to the graph. + FunctionLibraryDefinition func_def(OpRegistry::Global(), library); + + for (const NodeDef& node : func.node_def()) { + NodeDef* new_node = new_item->graph.add_node(); + *new_node = node; + // Replace the placeholder attribute values with the specified value. + for (auto& attr : *new_node->mutable_attr()) { + const string& ph_name = attr.second.placeholder(); + auto it = func_attr.find(ph_name); + if (it != func_attr.end()) { + attr.second = it->second; + } + } + + // Functions use a custom format to encode connectivity. Map these custom + // strings to regular ones. + const OpRegistrationData* registration; + Status status = func_def.LookUp(node.op(), ®istration); + if (!status.ok()) { + LOG(ERROR) << "Op " << node.op() << " not registered: " << status; + return nullptr; + } + + tensorflow::NameRangeMap inputs; + tensorflow::NameRangeMap outputs; + status = tensorflow::NameRangesForNode(node, registration->op_def, &inputs, + &outputs); + if (!status.ok()) { + LOG(ERROR) << "Op " << node.op() << " invalid: " << status; + return nullptr; + } + for (const auto& name_range : outputs) { + string port_prefix = + strings::StrCat(node.name(), ":", name_range.first, ":"); + int index_start = name_range.second.first; + int index_end = name_range.second.second; + for (int i = index_start; i < index_end; ++i) { + string port_id = strings::StrCat(port_prefix, i - index_start); + string port_name = strings::StrCat(node.name(), ":", i); + port_map[port_id] = port_name; + } + } + } + + for (auto& node : *new_item->graph.mutable_node()) { + // Rewrite the inputs to use the normal naming convention. + for (int i = 0; i < node.input_size(); ++i) { + const string& input = node.input(i); + if (IsControlInput(input)) { + // No need to remap control dependencies. + continue; + } else { + auto it = port_map.find(input); + if (it == port_map.end()) { + LOG(ERROR) << "Unknown input: " << input; + return nullptr; + } + node.set_input(i, it->second); + } + } + } + + // Add the function outputs to the list of fetch nodes. + for (const auto& out : func.signature().output_arg()) { + new_item->fetch.emplace_back(out.name()); + } + // Add the function inputs to the list of feeds. + for (const auto& inp : func.signature().input_arg()) { + new_item->feed.emplace_back(inp.name(), Tensor()); + } + + return new_item; +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions.h b/tensorflow/core/grappler/utils/functions.h new file mode 100644 index 0000000000..8f9b7d848a --- /dev/null +++ b/tensorflow/core/grappler/utils/functions.h @@ -0,0 +1,39 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_UTILS_FUNCTIONS_H_ +#define TENSORFLOW_GRAPPLER_UTILS_FUNCTIONS_H_ + +#include +#include +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" + +namespace tensorflow { + +namespace grappler { + +// Factory method for creating a GrapplerItem from a FunctionDef. +// Returns nullptr if the given function def cannot be converted. +std::unique_ptr GrapplerItemFromFunctionDef( + const FunctionDef& func, + const std::unordered_map& func_attr, + const FunctionDefLibrary& library); + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_UTILS_FUNCTIONS_H_ diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc new file mode 100644 index 0000000000..25ccb50084 --- /dev/null +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -0,0 +1,232 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/utils/functions.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/meta_graph.pb.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class FunctionsTest : public ::testing::Test {}; + +TEST_F(FunctionsTest, FromSimpleFunctionDef) { + const Tensor kTwo = test::AsScalar(2); + FunctionDef func = FunctionDefHelper::Define( + // Name + "XTimesTwo", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}}, + }); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + CHECK(item); + EXPECT_EQ("XTimesTwo", item->id); + EXPECT_EQ(4, item->graph.node_size()); + EXPECT_EQ(std::vector({"y"}), item->fetch); + EXPECT_EQ(1, item->feed.size()); + EXPECT_EQ("x", item->feed[0].first); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "x") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "two") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "scale") { + EXPECT_EQ("Cast", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("DstT").type()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("two:0", node.input(0)); + } else if (node.name() == "y") { + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("scale:0", node.input(1)); + } + } +} + +TEST_F(FunctionsTest, FromFunctionDefWithMultiOutputNodes) { + // Gradient graph for the Subtract operation + std::vector nodes = { + {{"sx"}, "Shape", {"x"}}, + {{"sy"}, "Shape", {"y"}}, + {{"gx"}, "Identity", {"dz"}}, + {{"gy"}, "Neg", {"dz"}}, + {{"rx", "ry"}, "BroadcastGradientArgs", {"sx", "sy"}}, + {{"sum_gx"}, "Sum", {"gx", "rx"}}, + {{"dx"}, "Reshape", {"sum_gx", "sx"}}, + {{"sum_gy"}, "Sum", {"gy", "ry"}}, + {{"dy"}, "Reshape", {"sum_gy", "sy"}}, + }; + + for (auto &n : nodes) { + // "BroadcastGradientArgs" doesn't need any attrs. + if (n.attr.empty() && n.op != "BroadcastGradientArgs") { + n.attr = {{"T", "$T"}}; + } + } + FunctionDef func = FunctionDefHelper::Define( + // Name + "SubGrad", + // Arg defs + {"x: T", "y: T", "dz: T"}, + // Ret val defs + {"dx: T", "dy: T"}, + // Attr defs + {{"T: {half, float, double}"}}, + // Nodes + nodes); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + CHECK(item); + EXPECT_EQ("SubGrad", item->id); + EXPECT_EQ(12, item->graph.node_size()); + EXPECT_EQ(std::vector({"dx", "dy"}), item->fetch); + EXPECT_EQ(3, item->feed.size()); + EXPECT_EQ("x", item->feed[0].first); + EXPECT_EQ("y", item->feed[1].first); + EXPECT_EQ("dz", item->feed[2].first); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "x" || node.name() == "y" || node.name() == "dz") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "rx") { + EXPECT_EQ("BroadcastGradientArgs", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("sx:0", node.input(0)); + EXPECT_EQ("sy:0", node.input(1)); + } else if (node.name() == "sum_gx") { + EXPECT_EQ("Sum", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("gx:0", node.input(0)); + EXPECT_EQ("rx:0", node.input(1)); + } else if (node.name() == "sum_gy") { + EXPECT_EQ("Sum", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("gy:0", node.input(0)); + EXPECT_EQ("rx:1", node.input(1)); + } + } +} + +TEST_F(FunctionsTest, FromFunctionDefWithNestedFuncs) { + FunctionDefLibrary library; + *library.add_function() = FunctionDefHelper::Define( + // Name + "Swap", + // Args + {"i0: T", "i1: T"}, + // Return values + {"o0: T", "o1: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}}, + {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}}); + + FunctionDef func = FunctionDefHelper::Create( + // Name + "ManySwapsFirst", + // Args + {"x: float", "y: float"}, + // Return values + {"o: float"}, + // attr def + {}, + // Nodes + // o = x*x + y*y. Furthermore, The 1st swap depends on x2, and + // y2 depends on the 2nd swap. The 2nd swap has data dependency + // on the 1st swap. + {{{"a0"}, "Swap", {"x", "y"}, {{"T", DT_FLOAT}}, {"x2"}}, + {{"a1"}, "Swap", {"a0:o0:0", "a0:o1:0"}, {{"T", DT_FLOAT}}}, + {{"x2"}, "Mul", {"x", "x"}, {{"T", DT_FLOAT}}}, + {{"y2"}, "Mul", {"y", "y"}, {{"T", DT_FLOAT}}, {"a1"}}, + {{"o"}, "Add", {"x2:z:0", "y2:z:0"}, {{"T", DT_FLOAT}}}}, + {{"o", "o:z:0"}}); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "x" || node.name() == "y") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "a0") { + EXPECT_EQ("Swap", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("^x2", node.input(2)); + } else if (node.name() == "a1") { + EXPECT_EQ("Swap", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("a0:0", node.input(0)); + EXPECT_EQ("a0:1", node.input(1)); + } else if (node.name() == "x2") { + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("x", node.input(1)); + } else if (node.name() == "y2") { + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("^a1", node.input(2)); + } else if (node.name() == "o") { + EXPECT_EQ("Add", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x2:0", node.input(0)); + EXPECT_EQ("y2:0", node.input(1)); + } + } +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index fef8e97b6e..79b2aa2808 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -46,8 +46,8 @@ std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { session->Run(run_options, {}, {}, item.init_ops, &dummy, nullptr)); } std::vector output_tensors; - TF_CHECK_OK( - session->Run(run_options, {}, item.fetch, {}, &output_tensors, nullptr)); + TF_CHECK_OK(session->Run(run_options, item.feed, item.fetch, {}, + &output_tensors, nullptr)); TF_CHECK_OK(session->Close()); return output_tensors; } -- GitLab From 1f18f757042e678cc935f645e9e5c21208ddc9ac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 11:40:05 -0800 Subject: [PATCH 062/311] Don't crash on missing inputs in dependency analyzer. This is a temporary mitigation until the underlying bug is found. PiperOrigin-RevId: 187207594 --- tensorflow/core/grappler/optimizers/dependency_optimizer.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index edb0db65e9..b47cba5ff7 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -286,7 +286,10 @@ void DependencyOptimizer::OptimizeNode(int node_idx, std::vector input_nodes; for (int i = 0; i < num_inputs; ++i) { NodeDef* input_node = node_map_->GetNode(node->input(i)); - CHECK_NE(input_node, nullptr); + if (input_node == nullptr) { + LOG(ERROR) << "Invalid input " << node->input(i); + return; + } input_nodes.push_back(input_node); } -- GitLab From 207af365eb719fa7af3b56e1723fe3f67b0c4f0f Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 27 Feb 2018 11:48:25 -0800 Subject: [PATCH 063/311] [TF:XLA] Bump open source llvm revision to r326181 PiperOrigin-RevId: 187208788 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5b09c5e67d..fa3671b4c9 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", ], - sha256 = "63d4da54dc7bc9a79e2ad266d230f4f759520cccb344a2dd49c2c6383ab75285", - strip_prefix = "llvm-8f7bcdf3c65b9a47e35653d525135beb18f3ac25", + sha256 = "e6bb793bbdce37ee5643789a27d174f1cdd8e7323a69d5f331376eb34755ee0d", + strip_prefix = "llvm-832f2bf0d8908aea8160bab128708d521764fe8d", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From d429fe193f4c235cde8223804ea888c2eaa5ce68 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 27 Feb 2018 11:57:09 -0800 Subject: [PATCH 064/311] Improve our handling of bitcasts. - Do not fuse bitcasts in the CPU backend. Fused instructions lose their layout and a bitcast is meaningless without a layout. We were explicitly testing for this so I've changed the corresponding tests to use a reshape instead. - Fail the layout assignment if we see a bitcast. bitcasts are inherently layout sensitive and so a bitcast instruction present in the IR before layout assignment is a bug. PiperOrigin-RevId: 187210151 --- .../xla/service/cpu/cpu_instruction_fusion.cc | 1 - .../cpu/cpu_instruction_fusion_test.cc | 29 +++++++++---------- .../compiler/xla/service/layout_assignment.cc | 7 +++++ .../xla/service/layout_assignment_test.cc | 21 ++++++++++++++ 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc index 482e04052d..0fc5a746bb 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc @@ -30,7 +30,6 @@ bool CanBeLoopFused(const HloInstruction& hlo) { // These are the only ones we fuse since we rely on effective elemental IR // generation. return hlo.IsElementwise() || // - hlo.opcode() == HloOpcode::kBitcast || hlo.opcode() == HloOpcode::kBroadcast || hlo.opcode() == HloOpcode::kConcatenate || hlo.opcode() == HloOpcode::kDynamicSlice || diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc index 595c3f55b3..6ed1cd31b1 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc @@ -77,7 +77,7 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_1) { EXPECT_THAT(computation->root_instruction(), op::Fusion()); } -TEST_F(InstructionFusionTest, DotOperationFusion_Bitcast) { +TEST_F(InstructionFusionTest, DotOperationNoFusion_Bitcast) { HloComputation::Builder builder(TestName()); HloInstruction* arg0 = builder.AddInstruction(HloInstruction::CreateParameter( 0, ShapeUtil::MakeShape(F32, {2, 512, 2, 128}), "arg0")); @@ -94,8 +94,7 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Bitcast) { auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); EXPECT_EQ(dot, computation->root_instruction()); - EXPECT_TRUE(CpuInstructionFusion().Run(module.get()).ValueOrDie()); - EXPECT_THAT(computation->root_instruction(), op::Fusion()); + EXPECT_FALSE(CpuInstructionFusion().Run(module.get()).ValueOrDie()); } TEST_F(InstructionFusionTest, DotOperationFusion_Reshape) { @@ -244,35 +243,33 @@ class OpcodeFusionTest : public InstructionFusionTest { } }; -TEST_F(OpcodeFusionTest, Exponential_Bitcast_Negate) { +TEST_F(OpcodeFusionTest, Exponential_Reshape_Negate) { HloComputation::Builder builder(TestName()); Shape param_shape = ShapeUtil::MakeShape(F32, {1, 4}); Shape result_shape = ShapeUtil::MakeShape(F32, {4}); HloInstruction* param0 = builder.AddInstruction( HloInstruction::CreateParameter(0, param_shape, "param")); - // InstructionFusion::ShouldFuse() precludes fusing a bitcast whose operand - // is a parameter, so create an operand between the parameter and bitcast. HloInstruction* exp1 = builder.AddInstruction( HloInstruction::CreateUnary(param_shape, HloOpcode::kExp, param0)); - HloInstruction* bitcast2 = builder.AddInstruction( - HloInstruction::CreateUnary(result_shape, HloOpcode::kBitcast, exp1)); + HloInstruction* reshape2 = + builder.AddInstruction(HloInstruction::CreateReshape(result_shape, exp1)); builder.AddInstruction( - HloInstruction::CreateUnary(result_shape, HloOpcode::kNegate, bitcast2)); + HloInstruction::CreateUnary(result_shape, HloOpcode::kNegate, reshape2)); auto module = CreateNewModule(); module->AddEntryComputation(builder.Build()); RunFusionAndCheckOpcodesWereFused( - module.get(), {HloOpcode::kNegate, HloOpcode::kBitcast, HloOpcode::kExp, + module.get(), {HloOpcode::kNegate, HloOpcode::kReshape, HloOpcode::kExp, HloOpcode::kParameter}); } -TEST_F(OpcodeFusionTest, Broadcast_Bitcast_DynamicSlice_Tanh) { +TEST_F(OpcodeFusionTest, Broadcast_Reshape_DynamicSlice_Tanh) { HloComputation::Builder builder(TestName()); Shape param_shape = ShapeUtil::MakeShape(F32, {8}); Shape starts_shape = ShapeUtil::MakeShape(F32, {2}); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {1, 8, 8}); - Shape bitcast_shape = ShapeUtil::MakeShape(F32, {8, 8}); + Shape reshape_shape = ShapeUtil::MakeShape(F32, {8, 8}); Shape dynamic_slice_shape = ShapeUtil::MakeShape(F32, {4, 4}); HloInstruction* param0 = builder.AddInstruction( HloInstruction::CreateParameter(0, param_shape, "param")); @@ -280,11 +277,11 @@ TEST_F(OpcodeFusionTest, Broadcast_Bitcast_DynamicSlice_Tanh) { HloInstruction::CreateParameter(1, starts_shape, "starts")); HloInstruction* broadcast2 = builder.AddInstruction( HloInstruction::CreateBroadcast(broadcast_shape, param0, {1})); - HloInstruction* bitcast3 = builder.AddInstruction(HloInstruction::CreateUnary( - bitcast_shape, HloOpcode::kBitcast, broadcast2)); + HloInstruction* reshape3 = builder.AddInstruction( + HloInstruction::CreateReshape(reshape_shape, broadcast2)); HloInstruction* dynamic_slice4 = builder.AddInstruction(HloInstruction::CreateDynamicSlice( - dynamic_slice_shape, bitcast3, param1, {4, 4})); + dynamic_slice_shape, reshape3, param1, {4, 4})); builder.AddInstruction(HloInstruction::CreateUnary( dynamic_slice_shape, HloOpcode::kTanh, dynamic_slice4)); @@ -293,7 +290,7 @@ TEST_F(OpcodeFusionTest, Broadcast_Bitcast_DynamicSlice_Tanh) { RunFusionAndCheckOpcodesWereFused( module.get(), - {HloOpcode::kTanh, HloOpcode::kDynamicSlice, HloOpcode::kBitcast, + {HloOpcode::kTanh, HloOpcode::kDynamicSlice, HloOpcode::kReshape, HloOpcode::kBroadcast, HloOpcode::kParameter, HloOpcode::kParameter}); } diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 4929300f7d..39f9120e55 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -1561,6 +1561,13 @@ StatusOr LayoutAssignment::Run(HloModule* module) { // infeeds. Clearing the layouts here avoids hiding potential bugs in the // layout assignment pass that may accidently use the existing layout. for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kBitcast) { + // bitcasts are inherently layout sensitive and so a bitcast instruction + // present in the IR before layout assignment is a bug. + return InternalError( + "Unexpected bitcast operation seen during layout assignment: %s.", + instruction->ToString().c_str()); + } if (instruction->opcode() != HloOpcode::kInfeed) { LayoutUtil::ClearLayout(instruction->mutable_shape()); } diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 62feb7c1e9..4b1c9bad41 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -796,5 +796,26 @@ TEST_F(LayoutAssignmentTest, ConditionalAsymmetricLayout) { EXPECT_THAT(false_result->opcode(), HloOpcode::kCopy); } +TEST_F(LayoutAssignmentTest, InternalErrorOnBitcast) { + auto builder = HloComputation::Builder(TestName()); + auto constant0 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + builder.AddInstruction(HloInstruction::CreateUnary( + constant0->shape(), HloOpcode::kBitcast, constant0)); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + ComputationLayout computation_layout( + module->entry_computation()->ComputeProgramShape()); + LayoutAssignment layout_assignment(&computation_layout); + Status error_status = layout_assignment.Run(module.get()).status(); + EXPECT_FALSE(error_status.ok()); + EXPECT_THAT( + error_status.error_message(), + ::testing::HasSubstr( + "Unexpected bitcast operation seen during layout assignment")); +} + } // namespace } // namespace xla -- GitLab From e504797de0b1112caea5080c3ab2060156c4e8a1 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 27 Feb 2018 12:05:41 -0800 Subject: [PATCH 065/311] Use a couple of type aliases for brevity; NFC PiperOrigin-RevId: 187211560 --- .../compiler/xla/service/hlo_evaluator.cc | 133 ++++++++---------- 1 file changed, 62 insertions(+), 71 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index afbfdac05e..8c7459099d 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -51,6 +51,10 @@ namespace xla { namespace { +using tensorflow::gtl::ArraySlice; +using tensorflow::gtl::FlatSet; +using tensorflow::gtl::optional; + template struct is_complex_t : public std::false_type {}; @@ -105,11 +109,10 @@ StatusOr> Compare(const Shape& shape, HloOpcode opcode, } auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { - return compare_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); + TF_RETURN_IF_ERROR(result->Populate([&](ArraySlice multi_index) { + return compare_op(lhs_literal.Get(multi_index), + rhs_literal.Get(multi_index)); + })); return std::move(result); } @@ -136,11 +139,10 @@ StatusOr> Compare( } auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { - return compare_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); + TF_RETURN_IF_ERROR(result->Populate([&](ArraySlice multi_index) { + return compare_op(lhs_literal.Get(multi_index), + rhs_literal.Get(multi_index)); + })); return std::move(result); } @@ -165,8 +167,8 @@ StatusOr> ElementWiseUnaryOpImpl( auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { return unary_op(operand_literal.Get(multi_index)); })); return std::move(result); @@ -178,7 +180,7 @@ StatusOr> ElementWiseUnaryOpImpl( // with the base index. void IterateThroughWindow( const Shape& window_shape, const Window& window, const Shape& base_shape, - const tensorflow::gtl::ArraySlice& window_count_index, + const ArraySlice& window_count_index, const std::function&)>& f) { const int64 rank = ShapeUtil::Rank(base_shape); DimensionVector window_index(rank); @@ -332,13 +334,12 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { operand_to_broadcast.shape().dimensions(i)); } - return output->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { - for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { - broadcast_indices[i] = multi_index[broadcast->dimensions(i)]; - } - return operand_to_broadcast.Get(broadcast_indices); - }); + return output->Populate([&](ArraySlice multi_index) { + for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { + broadcast_indices[i] = multi_index[broadcast->dimensions(i)]; + } + return operand_to_broadcast.Get(broadcast_indices); + }); } template < @@ -902,8 +903,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); auto result = Literal::CreateFromShape(result_shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice out_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice out_index) { std::vector from_index(out_index.begin(), out_index.end()); for (const int64 dim : reverse_dimensions) { from_index[dim] = result_shape.dimensions(dim) - 1 - out_index[dim]; @@ -978,7 +979,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { DimensionVector rhs_index(rhs_rank); DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size()); - auto func = [&](tensorflow::gtl::ArraySlice out_index) { + auto func = [&](ArraySlice out_index) { ElementwiseT result_val = static_cast(0); std::fill(lhs_index.begin(), lhs_index.end(), 0); @@ -1100,9 +1101,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } std::vector rhs_non_batch_non_contracting_dims; - tensorflow::gtl::FlatSet batch_dims_set( - dnums.rhs_batch_dimensions().begin(), - dnums.rhs_batch_dimensions().end()); + FlatSet batch_dims_set(dnums.rhs_batch_dimensions().begin(), + dnums.rhs_batch_dimensions().end()); for (int64 i = 0; i < rhs_rank; i++) { if (i != rhs_contracting_dimension && batch_dims_set.count(i) == 0) { rhs_non_batch_non_contracting_dims.push_back(i); @@ -1114,8 +1114,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { DimensionVector lhs_index(lhs_rank); DimensionVector rhs_index(rhs_rank); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice result_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice result_index) { ElementwiseT result_val = static_cast(0); // Find the corresponding non-contracting indices for lhs and rhs. @@ -1209,9 +1209,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { parent_->GetEvaluatedLiteralFor(pad->operand(1)).Get({}); auto result = Literal::CreateFromShape(pad->shape()); TF_RETURN_IF_ERROR(result->Populate( - [&scalar](tensorflow::gtl::ArraySlice multi_index) { - return scalar; - })); + [&scalar](ArraySlice multi_index) { return scalar; })); const Literal& evaluated_operand = parent_->GetEvaluatedLiteralFor(pad->operand(0)); @@ -1375,8 +1373,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(map->shape()); HloEvaluator embedded_evaluator; - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { std::vector> arg_literals; arg_literals.reserve(operands.size()); @@ -1466,7 +1464,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { Status HandleReduce(HloInstruction* reduce) override { auto arg = reduce->operand(0); auto init_value = reduce->operand(1); - tensorflow::gtl::ArraySlice dimensions(reduce->dimensions()); + ArraySlice dimensions(reduce->dimensions()); HloComputation* function = reduce->to_apply(); TF_RET_CHECK(ShapeUtil::Rank(reduce->shape()) == ShapeUtil::Rank(arg->shape()) - dimensions.size()); @@ -1511,8 +1509,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { HloEvaluator embedded_evaluator; // For each resulting dimension, calculate and assign computed value. - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { ReturnT result_val = init_scalar; std::vector base(arg_dimensions.size()); @@ -1566,9 +1564,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // Initialize result array with the init value. TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice output_index) { - return init_scalar; - })); + [&](ArraySlice output_index) { return init_scalar; })); std::vector window_dimension_sizes; for (const auto& window_dimension : window.dimensions()) { @@ -1601,8 +1597,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // 2. Using the selected index, scatter value from `source` to result. We // do this by iterating through the window, and compare each index with // the selected index. - tensorflow::gtl::optional selected_val; - tensorflow::gtl::optional> selected_index; + optional selected_val; + optional> selected_index; IterateThroughWindow( window_shape, window, operand_literal.shape(), source_index, @@ -1698,8 +1694,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { HloEvaluator embedded_evaluator; // For each resulting dimension, calculate and assign computed value. - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice output_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice output_index) { ReturnT result_val = init_scalar; std::fill(window_index.begin(), window_index.end(), 0); @@ -1749,7 +1745,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { const int64 rank = ShapeUtil::Rank(operand->shape()); const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - auto func = [&](tensorflow::gtl::ArraySlice out_index) { + auto func = [&](ArraySlice out_index) { DimensionVector operand_index(rank); for (int64 i = 0; i < rank; ++i) { operand_index[i] = @@ -1930,8 +1926,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { std::vector operand_indices(start.size()); auto result = Literal::CreateFromShape(result_shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { for (int64 i = 0; i < operand_indices.size(); ++i) { CHECK_GE(multi_index[i] + start[i], 0); // Mod is only used here to be consistent with the existing @@ -2014,8 +2010,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { return ConvertBinaryFunction(binary_op)( lhs_literal.Get(multi_index), rhs_literal.Get(multi_index)); @@ -2052,8 +2048,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { return ternary_op(lhs_literal.Get(multi_index), rhs_literal.Get(multi_index), ehs_literal.Get(multi_index)); @@ -2107,8 +2103,7 @@ HloEvaluator::HloEvaluator() { template StatusOr> HloEvaluator::Evaluate( - const HloModule& module, - tensorflow::gtl::ArraySlice arg_literals) { + const HloModule& module, ArraySlice arg_literals) { XLA_VLOG_LINES(2, "HloEvaluator::Evaluate module:\n" + module.ToString()); evaluated_.clear(); @@ -2125,8 +2120,7 @@ StatusOr> HloEvaluator::Evaluate( template StatusOr> HloEvaluator::Evaluate( - const HloComputation& computation, - tensorflow::gtl::ArraySlice arg_literals) { + const HloComputation& computation, ArraySlice arg_literals) { XLA_VLOG_LINES( 2, "HloEvaluator::Evaluate computation:\n" + computation.ToString()); @@ -2142,8 +2136,7 @@ StatusOr> HloEvaluator::Evaluate( template StatusOr> HloEvaluator::Evaluate( - HloInstruction* instruction, - tensorflow::gtl::ArraySlice arg_literals) { + HloInstruction* instruction, ArraySlice arg_literals) { TF_RET_CHECK(hlo_query::AllOperandsAreParametersOrConstants(*instruction)); TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(instruction->shape())); @@ -2268,8 +2261,7 @@ Status HloEvaluator::HandleTranspose(HloInstruction* transpose) { } Status HloEvaluator::HandleConcatenate(HloInstruction* concatenate) { - tensorflow::gtl::ArraySlice operands( - concatenate->operands()); + ArraySlice operands(concatenate->operands()); // The result concatenate dimension is going to be the sum of all // concatenate dimensions of the operands taking part of the operation. const Shape& reference_shape = operands[0]->shape(); @@ -2532,28 +2524,27 @@ Status HloEvaluator::Postprocess(HloInstruction* hlo) { // Explicit instantiation of templatized Evaluate* methods. // -template StatusOr> HloEvaluator::Evaluate< - const Literal*>(const HloModule& module, - tensorflow::gtl::ArraySlice arg_literals); +template StatusOr> +HloEvaluator::Evaluate(const HloModule& module, + ArraySlice arg_literals); template StatusOr> HloEvaluator::Evaluate>( - const HloModule& module, - tensorflow::gtl::ArraySlice> arg_literals); + const HloModule& module, ArraySlice> arg_literals); -template StatusOr> HloEvaluator::Evaluate< - const Literal*>(const HloComputation& computation, - tensorflow::gtl::ArraySlice arg_literals); +template StatusOr> +HloEvaluator::Evaluate(const HloComputation& computation, + ArraySlice arg_literals); template StatusOr> HloEvaluator::Evaluate>( const HloComputation& computation, - tensorflow::gtl::ArraySlice> arg_literals); + ArraySlice> arg_literals); -template StatusOr> HloEvaluator::Evaluate< - const Literal*>(HloInstruction* instruction, - tensorflow::gtl::ArraySlice arg_literals); +template StatusOr> +HloEvaluator::Evaluate(HloInstruction* instruction, + ArraySlice arg_literals); template StatusOr> HloEvaluator::Evaluate>( HloInstruction* instruction, - tensorflow::gtl::ArraySlice> arg_literals); + ArraySlice> arg_literals); } // namespace xla -- GitLab From 691f1e6de0ce628ed11406bd6fd2f599763bb7cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 12:06:33 -0800 Subject: [PATCH 066/311] Add consistency check: for constant arrays (those that have a buffer), there must be a shape, and its flat-size must equal the buffer length. PiperOrigin-RevId: 187211685 --- .../contrib/lite/toco/import_tensorflow.cc | 37 +++++++++++++++++++ tensorflow/contrib/lite/toco/model.h | 4 ++ tensorflow/contrib/lite/toco/tflite/import.cc | 3 ++ tensorflow/contrib/lite/toco/tooling_util.cc | 10 ++++- 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 27d2f33a8d..52a0512e23 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -272,6 +272,39 @@ void ImportInt64Array(const TensorProto& input_tensor, Array* output_array) { } } +void ImportBoolArray(const TensorProto& input_tensor, Array* output_array) { + CHECK_EQ(input_tensor.dtype(), DT_BOOL); + const auto& input_shape = input_tensor.tensor_shape(); + CHECK_LE(input_shape.dim_size(), 4); + ImportShape(input_shape.dim(), output_array->mutable_shape()); + int input_flat_size = 1; + for (int k = 0; k < input_shape.dim_size(); k++) { + input_flat_size *= input_shape.dim(k).size(); + } + auto& output_bool_data = + output_array->GetMutableBuffer().data; + output_bool_data.resize(RequiredBufferSizeForShape(output_array->shape()), + false); + if (input_tensor.bool_val_size()) { + for (int i = 0; i < input_tensor.bool_val_size(); i++) { + output_bool_data[i] = input_tensor.bool_val(i); + } + } else if (input_tensor.tensor_content().size() == input_flat_size) { + std::vector buf(input_tensor.tensor_content().size()); + toco::port::CopyToBuffer(input_tensor.tensor_content(), buf.data()); + for (int i = 0; i < input_tensor.tensor_content().size(); i++) { + output_bool_data[i] = static_cast(buf[i]); + } + } else { + // Some graphs have bool const nodes without actual value... + // assuming that 'false' is implied. + // So far only encountered that in an array with 1 entry, let's + // require that until we encounter a graph where that's not the case. + CHECK_EQ(output_bool_data.size(), 1); + output_bool_data[0] = false; + } +} + void ImportStringArray(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_STRING); const auto& input_shape = input_tensor.tensor_shape(); @@ -347,6 +380,10 @@ void ConvertConstOperator(const NodeDef& node, array.data_type = ArrayDataType::kString; ImportStringArray(tensor, &array); break; + case DT_BOOL: + array.data_type = ArrayDataType::kBool; + ImportBoolArray(tensor, &array); + break; default: array.data_type = ArrayDataType::kNone; // do nothing, silently ignore the Const data. diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 346859ab39..d5df0fb951 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -244,6 +244,8 @@ struct GenericBuffer { // in containers and have the containers call the right subclass destructor. virtual ~GenericBuffer() {} + virtual int Length() const = 0; + const ArrayDataType type; protected: @@ -256,6 +258,8 @@ template struct Buffer : GenericBuffer { Buffer() : GenericBuffer(A) {} + int Length() const override { return data.size(); } + std::vector> data; }; diff --git a/tensorflow/contrib/lite/toco/tflite/import.cc b/tensorflow/contrib/lite/toco/tflite/import.cc index 5b1ab514b2..d2aeb78114 100644 --- a/tensorflow/contrib/lite/toco/tflite/import.cc +++ b/tensorflow/contrib/lite/toco/tflite/import.cc @@ -64,6 +64,9 @@ void ImportTensors(const ::tflite::Model& input_model, Model* model) { auto shape = input_tensor->shape(); if (shape) { + // If the shape is 0-dimensional, make sure to record it as such, + // as oppose to leaving the array without a shape. + array.mutable_shape()->mutable_dims()->clear(); for (int i = 0; i < shape->Length(); ++i) { auto d = shape->Get(i); array.mutable_shape()->mutable_dims()->push_back(d); diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 9e72582238..1ab7b34331 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -84,6 +84,8 @@ string ArrayDataTypeName(ArrayDataType data_type) { return "Uint64"; case ArrayDataType::kString: return "String"; + case ArrayDataType::kBool: + return "Bool"; case ArrayDataType::kNone: return "None"; default: @@ -809,9 +811,15 @@ void CheckEachArray(const Model& model) { // It's OK to have a buffer or an alloc, but not both. // (Since allocs are for transient arrays without a buffer). CHECK(!array->buffer || !array->alloc); - // If there is a buffer, its type should be consistent with data_type. if (array->buffer) { + // If there is a buffer, its type should be consistent with data_type. CHECK(array->buffer->type == array->data_type); + // The presence of a fixed buffer should imply the presence of a fixed + // shape. + CHECK(array->has_shape()); + // The shape flat-size should agree with the buffer length. + CHECK_EQ(array->buffer->Length(), + RequiredBufferSizeForShape(array->shape())); } // Check name. Either "name_with_suffix_8", "name_with_port:3", but not -- GitLab From f97d233e79aa7d88057c8b8b355eda6cb3bfea07 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Feb 2018 12:08:24 -0800 Subject: [PATCH 067/311] Register the function optimizer in the meta optimizer. Made sure it's turned OFF by default until more validation is done. PiperOrigin-RevId: 187211957 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../core/grappler/optimizers/function_optimizer.cc | 5 +++++ .../core/grappler/optimizers/function_optimizer_test.cc | 7 +++++++ tensorflow/core/grappler/optimizers/meta_optimizer.cc | 9 +++++++++ tensorflow/core/grappler/utils/BUILD | 1 - tensorflow/core/protobuf/rewriter_config.proto | 2 ++ 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index bd41854c41..7b801db2c8 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -455,6 +455,7 @@ cc_library( ":custom_graph_optimizer", ":custom_graph_optimizer_registry", ":dependency_optimizer", + ":function_optimizer", ":graph_optimizer", ":layout_optimizer", ":loop_optimizer", diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index efc4f2f4bd..3c96ff869b 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -45,6 +45,7 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, NodeDef* func_inputs = graph->add_node(); func_inputs->set_name(strings::StrCat(node.name(), "/", "inlined_inputs")); func_inputs->set_op("IdentityN"); + func_inputs->set_device(node.device()); *func_inputs->mutable_input() = node.input(); AttrValue::ListValue* type_list = (*func_inputs->mutable_attr())["T"].mutable_list(); @@ -79,6 +80,9 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, func_body_node.set_name( strings::StrCat(node.name(), "/", func_body_node.name())); + // Make sure the node is placed + func_body_node.set_device(node.device()); + // Move the node to the main graph graph->add_node()->Swap(&func_body_node); } @@ -88,6 +92,7 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, NodeDef* func_outputs = graph->add_node(); func_outputs->set_name(node.name()); func_outputs->set_op("IdentityN"); + func_outputs->set_device(node.device()); type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); for (const OpDef::ArgDef& arg : func.signature().output_arg()) { auto it = attr.find(arg.type_attr()); diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index b8e05a5296..76a5c08d35 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -50,33 +50,40 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { if (node.name() == "y/inlined_inputs") { count++; EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("x", node.input(0)); } else if (node.name() == "y/x") { count++; EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("y/inlined_inputs:0", node.input(0)); } else if (node.name() == "y/two") { count++; EXPECT_EQ("Const", node.op()); + EXPECT_EQ(device, node.device()); } else if (node.name() == "y/scale") { count++; EXPECT_EQ("Cast", node.op()); + EXPECT_EQ(device, node.device()); } else if (node.name() == "y/y") { count++; EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(2, node.input_size()); EXPECT_EQ("y/x", node.input(0)); EXPECT_EQ("y/scale:0", node.input(1)); } else if (node.name() == "y") { count++; EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("y/y", node.input(0)); } else if (node.name() == "z") { count++; EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("y", node.input(0)); } diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 7ae77207af..93658a6475 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" +#include "tensorflow/core/grappler/optimizers/function_optimizer.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/optimizers/layout_optimizer.h" #include "tensorflow/core/grappler/optimizers/loop_optimizer.h" @@ -56,6 +57,9 @@ std::unique_ptr MetaOptimizer::NewOptimizer( if (optimizer == "pruning") { graph_optimizer.reset(new ModelPruner()); } + if (optimizer == "function") { + graph_optimizer.reset(new FunctionOptimizer()); + } if (optimizer == "constfold") { graph_optimizer.reset(new ConstantFolding(cpu_device_)); } @@ -90,6 +94,10 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, if (!cfg_.disable_model_pruning()) { optimizers.push_back(std::unique_ptr(new ModelPruner())); } + if (cfg_.function_optimization() == RewriterConfig::ON) { + optimizers.push_back( + std::unique_ptr(new FunctionOptimizer())); + } if (cfg_.constant_folding() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( new ConstantFolding(cfg_.constant_folding(), cpu_device_))); @@ -223,6 +231,7 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, bool MetaOptimizerEnabled(const RewriterConfig& cfg) { return !cfg.disable_model_pruning() || cfg.layout_optimizer() != RewriterConfig::OFF || + cfg.function_optimization() == RewriterConfig::ON || cfg.constant_folding() != RewriterConfig::OFF || cfg.dependency_optimization() != RewriterConfig::OFF || cfg.loop_optimization() == RewriterConfig::ON || diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index fc05713494..3dbad40cae 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -155,7 +155,6 @@ cc_library( hdrs = ["functions.h"], visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 875e4663db..9ebf217811 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -44,6 +44,8 @@ message RewriterConfig { Toggle dependency_optimization = 8; // Loop optimizations (default is OFF). Toggle loop_optimization = 9; + // Function optimizations (default is OFF). + Toggle function_optimization = 10; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; -- GitLab From 24a1c89187e49847fbd3575d626f1e374ce9ed18 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Tue, 27 Feb 2018 12:12:32 -0800 Subject: [PATCH 068/311] Allow eager metrics to save internal variables by using global_variables. PiperOrigin-RevId: 187212528 --- .../contrib/eager/python/metrics_impl.py | 20 +++++++++++++------ .../contrib/eager/python/metrics_test.py | 13 ++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index ea8dbf2b46..5571e77c70 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -93,11 +93,12 @@ class Metric(object): `aggregate()`, it is for use by TensorFlow infrastructure. """ - def __init__(self, name=None): + def __init__(self, name=None, use_global_variables=False): self._built = False self._vars = [] self._initial_values = {} self._updates = [] + self._use_global_variables = use_global_variables name = name or self.__class__.__name__ # Replace things like spaces in name to create a valid scope name. scope_name = _to_replace.sub("_", name) @@ -245,9 +246,14 @@ class Metric(object): """***Only for use by descendants of Metric***.""" if self._built: raise RuntimeError("Can't call add_variable() except in build().") - collections = None if context.in_eager_mode() else [ - ops.GraphKeys.LOCAL_VARIABLES, ops.GraphKeys.METRIC_VARIABLES - ] + if context.in_eager_mode(): + collections = None + else: + if self._use_global_variables: + collections = [ops.GraphKeys.GLOBAL_VARIABLES] + else: + collections = [ops.GraphKeys.LOCAL_VARIABLES] + collections += [ops.GraphKeys.METRIC_VARIABLES] v = variable_scope.get_variable( name, shape, @@ -267,8 +273,10 @@ class Mean(Metric): # TODO(josh11b): Maybe have a dtype argument that defaults to tf.float64? # Or defaults to type of the input if it is tf.float32, else tf.float64? - def __init__(self, name=None, dtype=dtypes.float64): - super(Mean, self).__init__(name=name) + def __init__(self, name=None, dtype=dtypes.float64, + use_global_variables=False): + super(Mean, self).__init__(name=name, + use_global_variables=use_global_variables) self.dtype = dtype def build(self, *args, **kwargs): diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index a9ecaa3f8b..c9106294dc 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -50,6 +50,19 @@ class MetricsTest(test.TestCase): self.assertEqual( set(m.variables), set(ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))) + self.assertEqual(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES), []) + self.assertEqual( + set(m.variables), + set(ops.get_collection(ops.GraphKeys.METRIC_VARIABLES))) + + def testUseGlobalVariablesCollections(self): + with context.graph_mode(), ops.Graph().as_default(): + m = metrics.Mean(use_global_variables=True) + m(1000) + self.assertEqual( + set(m.variables), + set(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + self.assertEqual(ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES), []) self.assertEqual( set(m.variables), set(ops.get_collection(ops.GraphKeys.METRIC_VARIABLES))) -- GitLab From 78376e4077f4e9d293811bdbc453c6d1b93db453 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 27 Feb 2018 12:34:17 -0800 Subject: [PATCH 069/311] Make Layers Checkpointable (This change is mostly API goldens by volume) Layers will inherit from CheckpointableBase since they do variable management themselves. A __setattr__ override would also likely slow down functional layers significantly. I believe the plan for Model is to piggyback on its existing __setattr__ override rather than having Model inherit from CheckpointableBase through Layer and Checkpointable itself. PiperOrigin-RevId: 187215512 --- .../eager/python/checkpointable_utils_test.py | 32 ++++--------------- tensorflow/python/layers/base.py | 21 +++++++----- tensorflow/python/training/checkpointable.py | 16 +++++++--- .../api/golden/tensorflow.keras.-model.pbtxt | 1 + .../golden/tensorflow.keras.-sequential.pbtxt | 1 + .../tensorflow.keras.layers.-activation.pbtxt | 1 + ...eras.layers.-activity-regularization.pbtxt | 1 + .../golden/tensorflow.keras.layers.-add.pbtxt | 1 + ...nsorflow.keras.layers.-alpha-dropout.pbtxt | 1 + ...low.keras.layers.-average-pooling1-d.pbtxt | 1 + ...low.keras.layers.-average-pooling2-d.pbtxt | 1 + ...low.keras.layers.-average-pooling3-d.pbtxt | 1 + .../tensorflow.keras.layers.-average.pbtxt | 1 + ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 1 + ...tensorflow.keras.layers.-avg-pool2-d.pbtxt | 1 + ...tensorflow.keras.layers.-avg-pool3-d.pbtxt | 1 + ...ow.keras.layers.-batch-normalization.pbtxt | 1 + ...nsorflow.keras.layers.-bidirectional.pbtxt | 1 + ...tensorflow.keras.layers.-concatenate.pbtxt | 1 + ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt | 1 + .../tensorflow.keras.layers.-conv1-d.pbtxt | 1 + ...flow.keras.layers.-conv2-d-transpose.pbtxt | 1 + .../tensorflow.keras.layers.-conv2-d.pbtxt | 1 + ...flow.keras.layers.-conv3-d-transpose.pbtxt | 1 + .../tensorflow.keras.layers.-conv3-d.pbtxt | 1 + ...sorflow.keras.layers.-convolution1-d.pbtxt | 1 + ...ras.layers.-convolution2-d-transpose.pbtxt | 1 + ...sorflow.keras.layers.-convolution2-d.pbtxt | 1 + ...ras.layers.-convolution3-d-transpose.pbtxt | 1 + ...sorflow.keras.layers.-convolution3-d.pbtxt | 1 + ...tensorflow.keras.layers.-cropping1-d.pbtxt | 1 + ...tensorflow.keras.layers.-cropping2-d.pbtxt | 1 + ...tensorflow.keras.layers.-cropping3-d.pbtxt | 1 + .../tensorflow.keras.layers.-dense.pbtxt | 1 + .../golden/tensorflow.keras.layers.-dot.pbtxt | 1 + .../tensorflow.keras.layers.-dropout.pbtxt | 1 + .../tensorflow.keras.layers.-e-l-u.pbtxt | 1 + .../tensorflow.keras.layers.-embedding.pbtxt | 1 + .../tensorflow.keras.layers.-flatten.pbtxt | 1 + .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 1 + .../tensorflow.keras.layers.-g-r-u.pbtxt | 1 + ...rflow.keras.layers.-gaussian-dropout.pbtxt | 1 + ...sorflow.keras.layers.-gaussian-noise.pbtxt | 1 + ...as.layers.-global-average-pooling1-d.pbtxt | 1 + ...as.layers.-global-average-pooling2-d.pbtxt | 1 + ...as.layers.-global-average-pooling3-d.pbtxt | 1 + ...low.keras.layers.-global-avg-pool1-d.pbtxt | 1 + ...low.keras.layers.-global-avg-pool2-d.pbtxt | 1 + ...low.keras.layers.-global-avg-pool3-d.pbtxt | 1 + ...low.keras.layers.-global-max-pool1-d.pbtxt | 1 + ...low.keras.layers.-global-max-pool2-d.pbtxt | 1 + ...low.keras.layers.-global-max-pool3-d.pbtxt | 1 + ....keras.layers.-global-max-pooling1-d.pbtxt | 1 + ....keras.layers.-global-max-pooling2-d.pbtxt | 1 + ....keras.layers.-global-max-pooling3-d.pbtxt | 1 + ...tensorflow.keras.layers.-input-layer.pbtxt | 1 + ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 1 + .../tensorflow.keras.layers.-l-s-t-m.pbtxt | 1 + .../tensorflow.keras.layers.-lambda.pbtxt | 1 + .../tensorflow.keras.layers.-layer.pbtxt | 1 + ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt | 1 + ...w.keras.layers.-locally-connected1-d.pbtxt | 1 + ...w.keras.layers.-locally-connected2-d.pbtxt | 1 + .../tensorflow.keras.layers.-masking.pbtxt | 1 + ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 1 + ...tensorflow.keras.layers.-max-pool2-d.pbtxt | 1 + ...tensorflow.keras.layers.-max-pool3-d.pbtxt | 1 + ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 1 + ...sorflow.keras.layers.-max-pooling2-d.pbtxt | 1 + ...sorflow.keras.layers.-max-pooling3-d.pbtxt | 1 + .../tensorflow.keras.layers.-maximum.pbtxt | 1 + .../tensorflow.keras.layers.-multiply.pbtxt | 1 + .../tensorflow.keras.layers.-p-re-l-u.pbtxt | 1 + .../tensorflow.keras.layers.-permute.pbtxt | 1 + .../tensorflow.keras.layers.-r-n-n.pbtxt | 1 + ...nsorflow.keras.layers.-repeat-vector.pbtxt | 1 + .../tensorflow.keras.layers.-reshape.pbtxt | 1 + ...flow.keras.layers.-separable-conv1-d.pbtxt | 1 + ...flow.keras.layers.-separable-conv2-d.pbtxt | 1 + ...ras.layers.-separable-convolution1-d.pbtxt | 1 + ...ras.layers.-separable-convolution2-d.pbtxt | 1 + ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 1 + ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 1 + .../tensorflow.keras.layers.-softmax.pbtxt | 1 + ...low.keras.layers.-spatial-dropout1-d.pbtxt | 1 + ...low.keras.layers.-spatial-dropout2-d.pbtxt | 1 + ...low.keras.layers.-spatial-dropout3-d.pbtxt | 1 + ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 1 + ...low.keras.layers.-thresholded-re-l-u.pbtxt | 1 + ...rflow.keras.layers.-time-distributed.pbtxt | 1 + ...sorflow.keras.layers.-up-sampling1-d.pbtxt | 1 + ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 1 + ...sorflow.keras.layers.-up-sampling3-d.pbtxt | 1 + .../tensorflow.keras.layers.-wrapper.pbtxt | 1 + ...orflow.keras.layers.-zero-padding1-d.pbtxt | 1 + ...orflow.keras.layers.-zero-padding2-d.pbtxt | 1 + ...orflow.keras.layers.-zero-padding3-d.pbtxt | 1 + .../tensorflow.keras.models.-model.pbtxt | 1 + .../tensorflow.keras.models.-sequential.pbtxt | 1 + ...ensorflow.layers.-average-pooling1-d.pbtxt | 1 + ...ensorflow.layers.-average-pooling2-d.pbtxt | 1 + ...ensorflow.layers.-average-pooling3-d.pbtxt | 1 + ...nsorflow.layers.-batch-normalization.pbtxt | 1 + .../golden/tensorflow.layers.-conv1-d.pbtxt | 1 + ...tensorflow.layers.-conv2-d-transpose.pbtxt | 1 + .../golden/tensorflow.layers.-conv2-d.pbtxt | 1 + ...tensorflow.layers.-conv3-d-transpose.pbtxt | 1 + .../golden/tensorflow.layers.-conv3-d.pbtxt | 1 + .../api/golden/tensorflow.layers.-dense.pbtxt | 1 + .../golden/tensorflow.layers.-dropout.pbtxt | 1 + .../golden/tensorflow.layers.-flatten.pbtxt | 1 + .../api/golden/tensorflow.layers.-layer.pbtxt | 1 + .../tensorflow.layers.-max-pooling1-d.pbtxt | 1 + .../tensorflow.layers.-max-pooling2-d.pbtxt | 1 + .../tensorflow.layers.-max-pooling3-d.pbtxt | 1 + ...tensorflow.layers.-separable-conv1-d.pbtxt | 1 + ...tensorflow.layers.-separable-conv2-d.pbtxt | 1 + ...flow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt | 1 + ...orflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt | 1 + ...nsorflow.nn.rnn_cell.-device-wrapper.pbtxt | 1 + ...sorflow.nn.rnn_cell.-dropout-wrapper.pbtxt | 1 + .../tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt | 1 + ...tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt | 1 + ...orflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt | 1 + .../tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt | 1 + ...orflow.nn.rnn_cell.-residual-wrapper.pbtxt | 1 + 126 files changed, 154 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 3d6a200276..83187b51b5 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -42,24 +42,6 @@ from tensorflow.python.training import saver as core_saver from tensorflow.python.training import training_util -class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable): - - def __init__(self, *args, **kwargs): - checkpointable.Checkpointable.__init__(self) - core.Dense.__init__(self, *args, **kwargs) - - def add_variable(self, name, shape, **kwargs): - # Calls both Checkpointable._add_variable and Layer.add_variable. Eventually - # Layer.add_variable should inherit from Checkpointable and simply call - # super and then do post-processing. - return checkpointable.Checkpointable._add_variable_with_custom_getter( - self, - name=name, - shape=shape, - getter=functools.partial(core.Dense.add_variable, self), - **kwargs) - - # pylint: disable=not-callable class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): @@ -122,9 +104,9 @@ class MyNetwork(CheckpointableNetwork): def __init__(self): super(MyNetwork, self).__init__() - self._named_dense = CheckpointableDenseLayer(1, use_bias=True) + self._named_dense = core.Dense(1, use_bias=True) self._via_track_layer = self.track_layer( - CheckpointableDenseLayer(1, use_bias=False), name="via_track_layer") + core.Dense(1, use_bias=False), name="via_track_layer") # We can still track Checkpointables which aren't Layers. self._non_layer = NonLayerCheckpointable() @@ -326,10 +308,10 @@ class CheckpointingTests(test.TestCase): "global_step:0", named_variables["optimizer_step" + suffix].name) self.assertEqual( - "my_network/checkpointable_dense_layer_1/kernel:0", + "my_network/dense_1/kernel:0", named_variables["network/via_track_layer/kernel" + suffix].name) self.assertEqual( - "my_network/checkpointable_dense_layer/kernel:0", + "my_network/dense/kernel:0", named_variables["network/_named_dense/kernel" + suffix].name) self.assertEqual( "beta1_power:0", @@ -348,18 +330,18 @@ class CheckpointingTests(test.TestCase): serialized_graph.nodes[optimizer_node.children[0].node_id] .attributes[0].full_name) self.assertEqual( - "my_network/checkpointable_dense_layer/kernel", + "my_network/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[0] .original_variable_node_id] .attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( - "my_network/checkpointable_dense_layer/kernel/Adam", + "my_network/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[0] .slot_variable_node_id] .attributes[0].full_name) self.assertEqual( - "my_network/checkpointable_dense_layer/kernel/Adam:0", + "my_network/dense/kernel/Adam:0", optimizer.get_slot( var=named_variables["network/_named_dense/kernel" + suffix], name="m").name) diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 8314c4aa87..2ec9971b88 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -36,12 +36,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @tf_export('layers.Layer') -class Layer(object): +class Layer(checkpointable.CheckpointableBase): """Base layer class. This is the class from which all layers inherit, implementing common @@ -532,13 +533,17 @@ class Layer(object): with vs.variable_scope( self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: with ops.name_scope(self._name_scope_name(scope)): - variable = vs.get_variable(name, - shape=shape, - initializer=initializer, - dtype=dtypes.as_dtype(dtype), - constraint=constraint, - trainable=trainable and self.trainable, - partitioner=partitioner) + variable = self._add_variable_with_custom_getter( + name=name, + shape=shape, + getter=vs.get_variable, + # Manage errors in Layer rather than Checkpointable. + overwrite=True, + initializer=initializer, + dtype=dtypes.as_dtype(dtype), + constraint=constraint, + trainable=trainable and self.trainable, + partitioner=partitioner) if init_graph is not None: # pylint: disable=protected-access # The variable was created and initialized in a graph. diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index 11caa761ae..c5e7f3cdac 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -322,7 +322,8 @@ class CheckpointableBase(object): def _add_variable_with_custom_getter( self, name, shape=None, dtype=dtypes.float32, - initializer=None, getter=None, **kwargs_for_getter): + initializer=None, getter=None, overwrite=False, + **kwargs_for_getter): """Restore-on-create for a variable be saved with this `Checkpointable`. If the user has requested that this object or another `Checkpointable` which @@ -334,12 +335,11 @@ class CheckpointableBase(object): name: A name for the variable. Must be unique within this object. shape: The shape of the variable. dtype: The data type of the variable. - initializer: The initializer to use. Ignored if there is a deferred restoration left over from a call to `_restore_from_checkpoint_position`. - getter: The getter to wrap which actually fetches the variable. + overwrite: If True, disables unique name and type checks. **kwargs_for_getter: Passed to the getter. Returns: @@ -349,7 +349,7 @@ class CheckpointableBase(object): ValueError: If the variable name is not unique. """ self._maybe_initialize_checkpointable() - if name in self._dependency_names: + if not overwrite and name in self._dependency_names: raise ValueError( ("A variable named '%s' already exists in this Checkpointable, but " "Checkpointable._add_variable called to create another with " @@ -385,7 +385,13 @@ class CheckpointableBase(object): # assign again. It will add this variable to our dependencies, and if there # is a non-trivial restoration queued, it will handle that. This also # handles slot variables. - return self._track_checkpointable(new_variable, name=name) + if not overwrite or isinstance(new_variable, CheckpointableBase): + return self._track_checkpointable(new_variable, name=name, + overwrite=overwrite) + else: + # TODO(allenl): Some variable types are not yet supported. Remove this + # fallback once all get_variable() return types are Checkpointable. + return new_variable def _preload_simple_restoration(self, name, shape): """Return a dependency's value for restore-on-create. diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 241db8956a..7be2f4f61f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index 9673a508d6..0f2428d77a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt index 041acf29ff..db8f626b98 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt index 48143b2cd6..809b3a5430 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt index 11f78fed97..68d41bb6cc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt index 84eb825632..970b777e51 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt index ab377a248f..529c64ab29 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt index c2edd79f52..7e7c330d74 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt index f3f37eed99..ada8466d74 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt index 31d1d1c049..2a5c1cd530 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 6582e1b18e..9a2cb29815 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt index 12f66095d2..f5e991ea42 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 3a45fa180e..31732214a6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt index a0f272c178..422eddf10d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt index 9c7d3154ad..9053a37916 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt index 949b225e54..3d536d2182 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index a736c84a10..6a7da1aef8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt index 95f9afed28..801a033972 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 38ba15400a..13352e264a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt index bc84e2a97e..f400e4a15c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index 0802578c22..b3a9f573b8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt index 8ad4646c74..a9be09c0ab 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt index 110e267b75..be1ef5eb92 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 24cfc83af6..30034f7eaf 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt index c56e89187f..189b38054c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index 3674f2746c..a76d85c629 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt index 5a8f9d7702..782195d4ad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt index caa748be81..2cb7a39ea5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt index 97bd4a265a..8080330699 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt index 20c43eeed1..678f40bbc2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt index 256f0e4bdf..fac826109b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt index d1e53f900c..285d544af2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt index b010ff6805..b77976974c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt index fffd3854bb..b07714d3f2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt index 1155fe03fc..e67d4ddfc4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt index 5e4bebb15b..b2a668e5a8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt index cb9bb3d821..1fd3febad2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt index 9a36e80649..f5f41d879d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activation" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt index eb32238e15..f4f1a5d51c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt index 37fc8e29ae..e502df5e17 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index 490816458b..9c8d5bfcd8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index ab49f67f33..8dd65f1f24 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index 3d7cb3ba49..5e30571cc7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index c99ddab4f3..ba90fa4546 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index 290d2eaebe..8823857758 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index cf63069641..500ced852b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index 2dadc67c09..cf2717ed46 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index 1a1a1dcf64..a86ff1a469 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index 44898e23ad..e01cc7c1b0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 941d867d24..259c1fb37c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index 9a5a6325f8..0c41bf97f7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index 7a0c1932f6..bec8817aa3 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt index f679c1d006..17be862229 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index ad1e7f2cad..6d2a8c5619 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt index 6dad4b4897..490b5b618c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activation" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt index fa45d8c902..21a65b838a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt index 023d6c0d69..127b04738e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.keras.layers.Layer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index e429fced77..87e49f2ed5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt index 462568124f..1aa3aad324 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt index 11bf6a2b42..5e9dc7d477 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt index a932448891..0d101e5b68 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt index 6ff2adddac..c85cd49ac8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt index 2957673d4d..4f59e330c9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt index 2191c10b73..c0ea0eb050 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt index af750ac1b6..ca37ae5131 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 9046061510..3ede237834 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt index a40666807b..d87e25a7ba 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt index 65378cef42..e4df7b48ae 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt index b037559e02..6bf7c77743 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt index b3a7f47fa5..c14be132b7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt index b2f22f7da3..72ffbceae0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt index 792eacf90d..d3e780c8b2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt index 5b79a021ca..a27980a9d1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt index 99c64505ee..67f991276c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt index d5873ccf76..fccea5e8af 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt index 76b4c10a46..d20663bdb0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index 40cd87de5f..889fa0a1b5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index c44c0da148..c850f3fedc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index bd70c31c38..526d88ccba 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt index de717976cf..7fddae3447 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activation" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt index a93b7b8f6e..5b9b62fc97 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index 4dc24b195e..769da30999 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index a3bb1cc414..fca2e42a15 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index f9a78106fa..36e8de09a9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 5aa21f4022..a96f16fae9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index 88e8a46572..e1cbd0e150 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt index f2a7673998..f0d35728fb 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt index 4db82ddfa9..74efaea6dd 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt index 61e65ad56d..dc5bd5fd53 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt index 3d9402db4e..e01ccfb74a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt index 0223799ed4..7e6f90f762 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt index 2e4429833a..4d0d402dad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt index 26cf7b9e49..b353a529bc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt index 64d35d9447..9fe1256e61 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index 18be9c9701..8ccf15f9ab 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index b934632922..102eb32203 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt index de81206bc8..1c4f550d7f 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt index 72d5496464..d2db095269 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt index 595e77ff9f..34d9a9df28 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt index 0c4aa2ff26..21ad0efecf 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.BatchNormalization" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt index 5f576d0189..ed38747c76 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt index 675a7c76e5..ff453c6059 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt index eaabbf6aab..5583bd22dc 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt index 838e070d79..63f0c32a7c 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt index 4bd8cfc1a4..b77726252c 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt index 57eccb03ff..92db9f6dcd 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.Dense" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt index a1ec00eeea..80fa846a24 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.Dropout" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt index a06943d51a..f63213b3dd 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.Flatten" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt index 24fda0c87e..4e45b2d513 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.layers.Layer" tf_class { is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt index 4c3d00e0e1..19ec33fce7 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt index f7e2017b0c..76180c333a 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt index 84780926a3..ded75c8ff0 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt index 05799ecfc9..3dbfa5453f 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt index c2aeb35c46..ab171df1d1 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt index 44536787f0..9c71a24d05 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt index 768565d3ca..9e19f96b74 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt index 0d253e5dd2..7540aa6286 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt index 97edf245f6..fc1ff38669 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt index 6ecc134d4d..751122cfff 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt index 4b3ca1578b..4b6313f395 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt index 9a6c73a079..00e8c71140 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt index 27488f8e73..3852f90dd6 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.nn.rnn_cell.RNNCell" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt index 3310836ed2..8f3f0f7506 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" -- GitLab From 7b71b0cfd9f7b4ceb17295cba5b651a04764c37b Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 27 Feb 2018 13:20:03 -0800 Subject: [PATCH 070/311] Checkpointable: Move the checkpoint-grouping utility out of the unit test file Renames Saver -> CheckpointableSaver in preparation for exposing the necessary symbols in tf.contrib.eager. There's a pending change for Optimizers, and Asim is handling Layers/Model. Once those are checked in, we should be able to save/restore everything in the eager examples (or at least the mnist one...). Still plenty more to make Checkpointable, but it should be usable at that point. PiperOrigin-RevId: 187221803 --- .../eager/python/checkpointable_utils.py | 93 ++++++++++++- .../eager/python/checkpointable_utils_test.py | 128 +++++++----------- 2 files changed, 139 insertions(+), 82 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index e57093bdbc..ed431e02ea 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -518,7 +518,7 @@ class _SessionWithFeedDictAdditions(session_lib.SessionInterface): fetches=fetches, feed_dict=feed_dict, **kwargs) -class Saver(object): +class CheckpointableSaver(object): """Saves and restores a `Checkpointable` object and its dependencies. See `Checkpointable` for details of dependency management. `Saver` wraps @@ -770,3 +770,94 @@ class Saver(object): load_status = CheckpointLoadStatus( checkpoint, feed_dict=file_prefix_feed_dict) return load_status + + +class Checkpoint(core_checkpointable.Checkpointable): + """A utility class which groups `Checkpointable` objects. + + Accepts arbitrary keyword arguments to its constructor and saves those values + with a checkpoint. Maintains a `save_counter` for numbering checkpoints. + + Example usage: + + ```python + import tensorflow as tf + import tensorflow.contrib.eager as tfe + import os + + checkpoint_directory = "/tmp/training_checkpoints" + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + root = tfe.Checkpoint(optimizer=optimizer, model=model) + root.restore(tf.train.latest_checkpoint(checkpoint_directory)) + for _ in range(num_training_steps): + optimizer.minimize( ... ) + root.save(file_prefix=checkpoint_prefix) + ``` + + For more manual control over saving, use `tfe.CheckpointableSaver` directly. + + Attributes: + save_counter: Incremented when `save()` is called. Used to number + checkpoints. + """ + + def __init__(self, **kwargs): + """Group objects into a training checkpoint. + + Args: + **kwargs: Keyword arguments are set as attributes of this object, and are + saved with the checkpoint. Attribute values must derive from + `CheckpointableBase`. + Raises: + ValueError: If objects in `kwargs` are not Checkpointable. + """ + super(Checkpoint, self).__init__() + for k, v in sorted(kwargs.items(), key=lambda item: item[0]): + if not isinstance(v, core_checkpointable.CheckpointableBase): + raise ValueError( + ("`Checkpoint` was expecting an object derived from " + "`CheckpointableBase`, got %s.") % (v,)) + setattr(self, k, v) + self._save_counter = None # Created lazily for restore-on-create. + self._saver = CheckpointableSaver(weakref.ref(self)) + + def _maybe_create_save_counter(self): + """Create a save counter if it does not yet exist.""" + if self._save_counter is None: + # Initialized to 0 and incremented before saving. + self._save_counter = add_variable( + self, name="save_counter", initializer=0, dtype=dtypes.int64) + + @property + def save_counter(self): + """An integer variable which starts at zero and is incremented on save. + + Used to number checkpoints. + + Returns: + The save counter variable. + """ + self._maybe_create_save_counter() + return self._save_counter + + def save(self, file_prefix, session=None): + """Save a checkpoint. Wraps `tfe.CheckpointableSaver.save`.""" + assign_op = self.save_counter.assign_add(1) + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + session.run(assign_op) + return self._saver.save( + file_prefix=file_prefix, + checkpoint_number=self.save_counter, + session=session) + + def restore(self, save_path): + """Restore a checkpoint. Wraps `tfe.CheckpointableSaver.restore`.""" + status = self._saver.restore(save_path=save_path) + # Create the save counter now so it gets initialized with other variables + # when graph building. Creating it earlier would lead to double + # initialization when executing eagerly. + self._maybe_create_save_counter() + return status diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 83187b51b5..68f0d93632 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -18,7 +18,6 @@ from __future__ import print_function import functools import os -import weakref import six @@ -114,51 +113,6 @@ class MyNetwork(CheckpointableNetwork): return self._via_track_layer(self._named_dense(values)) -class Checkpoint(checkpointable.Checkpointable): - """A utility class which groups `Checkpointable` objects.""" - - def __init__(self, **kwargs): - super(Checkpoint, self).__init__() - for k, v in sorted(kwargs.items(), key=lambda item: item[0]): - setattr(self, k, v) - self._save_counter = None # Created lazily for restore-on-create. - self._saver = checkpointable_utils.Saver(weakref.ref(self)) - - @property - def save_counter(self): - """An integer variable which starts at zero and is incremented on save. - - Used to number checkpoints. - - Returns: - The save counter variable. - """ - if self._save_counter is None: - # Initialized to 0 and incremented before saving. - self._save_counter = checkpointable_utils.add_variable( - self, name="save_counter", initializer=0, dtype=dtypes.int64) - return self._save_counter - - def save(self, file_prefix, session=None): - assign_op = self.save_counter.assign_add(1) - if context.in_graph_mode(): - if session is None: - session = ops.get_default_session() - session.run(assign_op) - return self._saver.save( - file_prefix=file_prefix, - checkpoint_number=self.save_counter, - session=session) - - def restore(self, save_path): - status = self._saver.restore(save_path=save_path) - # Create the save counter now so it gets initialized with other variables - # when graph building. Creating it earlier would lead to double - # initialization when executing eagerly. - self.save_counter # pylint: disable=pointless-statement - return status - - class InterfaceTests(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) @@ -256,7 +210,7 @@ class CheckpointingTests(test.TestCase): other_network = MyNetwork() optimizer = CheckpointableAdam(0.001) optimizer_step = training_util.get_or_create_global_step() - root_checkpointable = Checkpoint( + root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) if context.in_eager_mode(): optimizer.minimize( @@ -361,7 +315,8 @@ class CheckpointingTests(test.TestCase): def testSaveRestore(self): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root_checkpointable = Checkpoint(optimizer=optimizer, network=network) + root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, network=network) input_value = constant_op.constant([[3.]]) if context.in_eager_mode(): optimizer.minimize( @@ -392,7 +347,7 @@ class CheckpointingTests(test.TestCase): return # Restore-on-create is only supported when executing eagerly on_create_network = MyNetwork() on_create_optimizer = CheckpointableAdam(0.001) - on_create_root = Checkpoint( + on_create_root = checkpointable_utils.Checkpoint( optimizer=on_create_optimizer, network=on_create_network) # Deferred restoration status = on_create_root.restore(save_path=save_path) @@ -424,7 +379,7 @@ class CheckpointingTests(test.TestCase): for training_continuation in range(3): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root = Checkpoint( + root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=training_util.get_or_create_global_step()) root.restore(core_saver.latest_checkpoint(checkpoint_directory)) @@ -448,7 +403,7 @@ class CheckpointingTests(test.TestCase): with ops.Graph().as_default(): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root = Checkpoint( + root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) input_value = constant_op.constant([[3.]]) @@ -485,7 +440,7 @@ class CheckpointingTests(test.TestCase): graph=ops.get_default_graph()): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root = Checkpoint( + root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) @@ -567,9 +522,11 @@ class CheckpointingTests(test.TestCase): self.evaluate(state_ops.assign(original.dep.var, 123.)) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = checkpointable_utils.Saver(original).save(checkpoint_prefix) + save_path = checkpointable_utils.CheckpointableSaver( + original).save(checkpoint_prefix) load_into = LateDependencies() - status = checkpointable_utils.Saver(load_into).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + load_into).restore(save_path) with self.assertRaises(AssertionError): status.assert_consumed() load_into.add_dep() @@ -598,11 +555,12 @@ class CheckpointingTests(test.TestCase): self.evaluate(state_ops.assign(dep_after_var.dep.var, -14.)) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = checkpointable_utils.Saver(dep_after_var).save( + save_path = checkpointable_utils.CheckpointableSaver(dep_after_var).save( checkpoint_prefix) loaded_dep_after_var = DepAfterVar() - status = checkpointable_utils.Saver(loaded_dep_after_var).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + loaded_dep_after_var).restore(save_path) loaded_dep_after_var.add_dep() status.assert_consumed() status.run_restore_ops() @@ -622,24 +580,26 @@ class CheckpointingTests(test.TestCase): # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate(checkpointable_utils.gather_initializers( - Checkpoint(root=root, optimizer=optimizer))) + checkpointable_utils.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) else: optimizer.minimize(root.var.read_value) self.evaluate(state_ops.assign(root.var, 12.)) - no_slots_path = checkpointable_utils.Saver(root).save( + no_slots_path = checkpointable_utils.CheckpointableSaver(root).save( os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(state_ops.assign(root.var, 13.)) self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var), 14.)) - slots_path = checkpointable_utils.Saver(root).save( + slots_path = checkpointable_utils.CheckpointableSaver(root).save( os.path.join(checkpoint_directory, "with_slots")) new_root = checkpointable.Checkpointable() # Load the slot-containing checkpoint (deferred), then immediately overwrite # the non-slot variable (also deferred). - slot_status = checkpointable_utils.Saver(new_root).restore(slots_path) - no_slot_status = checkpointable_utils.Saver(new_root).restore(no_slots_path) + slot_status = checkpointable_utils.CheckpointableSaver( + new_root).restore(slots_path) + no_slot_status = checkpointable_utils.CheckpointableSaver( + new_root).restore(no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = checkpointable_utils.add_variable( @@ -679,15 +639,17 @@ class CheckpointingTests(test.TestCase): save_root.dep.var = checkpointable_utils.add_variable( save_root.dep, name="var", initializer=0.) self.evaluate(state_ops.assign(save_root.dep.var, 12.)) - saver = checkpointable_utils.Saver(save_root) + saver = checkpointable_utils.CheckpointableSaver(save_root) first_path = saver.save(os.path.join(checkpoint_directory, "first")) self.evaluate(state_ops.assign(save_root.dep.var, 13.)) second_path = saver.save(os.path.join(checkpoint_directory, "second")) first_root = checkpointable.Checkpointable() second_root = checkpointable.Checkpointable() - first_status = checkpointable_utils.Saver(first_root).restore(first_path) - second_status = checkpointable_utils.Saver(second_root).restore(second_path) + first_status = checkpointable_utils.CheckpointableSaver( + first_root).restore(first_path) + second_status = checkpointable_utils.CheckpointableSaver( + second_root).restore(second_path) load_dep = checkpointable.Checkpointable() load_dep.var = checkpointable_utils.add_variable( load_dep, name="var", shape=[]) @@ -704,8 +666,10 @@ class CheckpointingTests(test.TestCase): # determines the final value. first_root = checkpointable.Checkpointable() second_root = checkpointable.Checkpointable() - second_status = checkpointable_utils.Saver(second_root).restore(second_path) - first_status = checkpointable_utils.Saver(first_root).restore(first_path) + second_status = checkpointable_utils.CheckpointableSaver( + second_root).restore(second_path) + first_status = checkpointable_utils.CheckpointableSaver( + first_root).restore(first_path) load_dep = checkpointable.Checkpointable() load_dep.var = checkpointable_utils.add_variable( load_dep, name="var", shape=[]) @@ -730,10 +694,10 @@ class CheckpointingTests(test.TestCase): save_root.dep_two.dep_three = dep_three checkpointable_utils.add_variable(dep_three, name="var", initializer=0.) self.evaluate(checkpointable_utils.gather_initializers(save_root)) - save_path = checkpointable_utils.Saver(save_root).save( + save_path = checkpointable_utils.CheckpointableSaver(save_root).save( os.path.join(checkpoint_directory, "ckpt")) load_root = checkpointable.Checkpointable() - checkpointable_utils.Saver(load_root).restore(save_path) + checkpointable_utils.CheckpointableSaver(load_root).restore(save_path) load_root.dep_one = checkpointable.Checkpointable() load_root.dep_two = checkpointable.Checkpointable() load_root.dep_one.dep_three = checkpointable.Checkpointable() @@ -753,7 +717,7 @@ class CheckpointingTests(test.TestCase): checkpointable_utils.add_variable( save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64) self.evaluate(checkpointable_utils.gather_initializers(save_root)) - save_path = checkpointable_utils.Saver(save_root).save( + save_path = checkpointable_utils.CheckpointableSaver(save_root).save( os.path.join(checkpoint_directory, "ckpt")) load_root = checkpointable.Checkpointable() load_root.dep_one = checkpointable.Checkpointable() @@ -762,7 +726,7 @@ class CheckpointingTests(test.TestCase): load_root.dep_one, name="var1", shape=[], dtype=dtypes.float64) v2 = checkpointable_utils.add_variable( load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64) - status = checkpointable_utils.Saver(load_root).restore( + status = checkpointable_utils.CheckpointableSaver(load_root).restore( save_path).assert_consumed() status.run_restore_ops() self.assertEqual(32., self.evaluate(v1)) @@ -782,12 +746,13 @@ class CheckpointingTests(test.TestCase): second, "v2", initializer=[1., 1., 2., 3.]) self.evaluate(checkpointable_utils.gather_initializers(first)) checkpoint_directory = self.get_temp_dir() - save_path = checkpointable_utils.Saver(first).save( + save_path = checkpointable_utils.CheckpointableSaver(first).save( os.path.join(checkpoint_directory, "ckpt")) # Test deferred loading first_load = checkpointable.Checkpointable() - status = checkpointable_utils.Saver(first_load).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + first_load).restore(save_path) second_load = checkpointable.Checkpointable() first_load.second = second_load second_load.first = first_load @@ -807,7 +772,7 @@ class CheckpointingTests(test.TestCase): self.assertAllEqual([2., 7., 1.], self.evaluate(first_load.v)) self.evaluate(second_load.v.assign([2., 7., 1., 8.])) self.assertAllEqual([2., 7., 1., 8.], self.evaluate(second_load.v)) - status = checkpointable_utils.Saver(first_load).restore( + status = checkpointable_utils.CheckpointableSaver(first_load).restore( save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) @@ -826,14 +791,15 @@ class CheckpointingTests(test.TestCase): name="blah", initializer=0.) self.evaluate(first.var1.assign(4.)) self.evaluate(first.var2.assign(8.)) - save_path = checkpointable_utils.Saver(first).save( + save_path = checkpointable_utils.CheckpointableSaver(first).save( checkpoint_prefix) restore_graph = ops.Graph() with restore_graph.as_default(), self.test_session(restore_graph): second = checkpointable.Checkpointable() second.var2 = variable_scope.get_variable( name="blah", initializer=0.) - status = checkpointable_utils.Saver(second).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + second).restore(save_path) recreated_var1 = variable_scope.get_variable( name="outside_var", initializer=0.) status.run_restore_ops() @@ -856,7 +822,7 @@ class CheckpointingTests(test.TestCase): obj.opt = CheckpointableAdam(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) - saver = checkpointable_utils.Saver(obj) + saver = checkpointable_utils.CheckpointableSaver(obj) saver.save(checkpoint_prefix) before_ops = graph.get_operations() saver.save(checkpoint_prefix) @@ -874,7 +840,7 @@ class CheckpointingTests(test.TestCase): obj.opt = CheckpointableAdam(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) - saver = checkpointable_utils.Saver(obj) + saver = checkpointable_utils.CheckpointableSaver(obj) save_path = saver.save(checkpoint_prefix) saver.restore(save_path) before_ops = graph.get_operations() @@ -889,7 +855,7 @@ class CheckpointCompatibilityTests(test.TestCase): network = MyNetwork() optimizer = CheckpointableAdam(0.001) optimizer_step = training_util.get_or_create_global_step() - root_checkpointable = Checkpoint( + root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) train_op = optimizer.minimize( functools.partial(network, input_value), @@ -945,7 +911,7 @@ class CheckpointCompatibilityTests(test.TestCase): self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) - object_saver = checkpointable_utils.Saver(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) status = object_saver.restore(save_path) with self.assertRaises(AssertionError): status.assert_consumed() @@ -966,7 +932,7 @@ class CheckpointCompatibilityTests(test.TestCase): with save_graph.as_default(), self.test_session( graph=save_graph) as session: root = self._initialized_model() - object_saver = checkpointable_utils.Saver(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) save_path = object_saver.save( session=session, file_prefix=checkpoint_prefix) with context.eager_mode(): @@ -980,7 +946,7 @@ class CheckpointCompatibilityTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.eager_mode(): root = self._initialized_model() - object_saver = checkpointable_utils.Saver(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) save_path = object_saver.save(file_prefix=checkpoint_prefix) with context.graph_mode(): save_graph = ops.Graph() -- GitLab From 142c1f0b9333a6e69fefad18b951944fa4617cd9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:22:58 -0800 Subject: [PATCH 071/311] During late import, update model->flags from the input-arrays shape information that was read from the graph (e.g. shape attribute in Placeholder nodes). PiperOrigin-RevId: 187222358 --- tensorflow/contrib/lite/toco/tooling_util.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 1ab7b34331..d23b3737fc 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1199,7 +1199,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { << "This model does not define output arrays, so a " "--output_arrays flag must be given on the command-line."; - for (const auto& input_array_proto : model->flags.input_arrays()) { + for (auto& input_array_proto : *model->flags.mutable_input_arrays()) { auto& input_array = model->GetOrCreateArray(input_array_proto.name()); if (input_array_proto.has_data_type()) { const ArrayDataType specified_type = @@ -1243,6 +1243,11 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { for (int i = 0; i < input_array_dims.size(); i++) { CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i)); } + } else { + for (int i = 0; i < input_array.shape().dimensions_count(); i++) { + input_array_proto.mutable_shape()->add_dims( + input_array.shape().dims(i)); + } } } -- GitLab From 93f5dd54dab124a9ec3b4c5dcb42d31716fe2f95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:36:10 -0800 Subject: [PATCH 072/311] Optimized non-aligned case of split and split_v on the first input dimension. PiperOrigin-RevId: 187224344 --- tensorflow/core/kernels/batch_kernels.cc | 14 +- tensorflow/core/kernels/split_lib.h | 32 ++-- tensorflow/core/kernels/split_lib_cpu.cc | 32 ++-- tensorflow/core/kernels/split_lib_gpu.cu.cc | 16 +- tensorflow/core/kernels/split_op.cc | 154 +++++++++++++------- tensorflow/core/kernels/split_v_op.cc | 149 ++++++++++++------- tensorflow/core/kernels/tensor_array_ops.cc | 12 +- tensorflow/core/kernels/unpack_op.cc | 14 +- 8 files changed, 258 insertions(+), 165 deletions(-) diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 546e51be53..8c99ded0a8 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -146,7 +146,7 @@ Status SplitCPU(OpKernelContext* context, const Tensor& input, suffix_dim_size *= input.shape().dim_size(i); } auto input_reshaped = - input.shaped({1, input.shape().dim_size(0), suffix_dim_size}); + input.shaped({input.shape().dim_size(0), suffix_dim_size}); int64 position = 0; for (const int64 size : sizes) { @@ -155,13 +155,13 @@ Status SplitCPU(OpKernelContext* context, const Tensor& input, Tensor output; TF_RETURN_IF_ERROR( context->allocate_temp(input.dtype(), output_shape, &output)); - auto output_shaped = output.shaped({1, size, suffix_dim_size}); + auto output_shaped = output.shaped({size, suffix_dim_size}); - Eigen::DSizes slice_indices{0, position, 0}; - Eigen::DSizes slice_sizes{1, size, suffix_dim_size}; - functor::Split()(context->eigen_device(), - output_shaped, input_reshaped, slice_indices, - slice_sizes); + Eigen::DSizes slice_indices{position, 0}; + Eigen::DSizes slice_sizes{size, suffix_dim_size}; + functor::Split()(context->eigen_device(), + output_shaped, input_reshaped, + slice_indices, slice_sizes); outputs->emplace_back(output); diff --git a/tensorflow/core/kernels/split_lib.h b/tensorflow/core/kernels/split_lib.h index a08949e626..bc1fa28f8f 100644 --- a/tensorflow/core/kernels/split_lib.h +++ b/tensorflow/core/kernels/split_lib.h @@ -31,31 +31,31 @@ struct SplitCustom { const Eigen::DSizes& slice_sizes); }; -template +template struct Split { - void operator()(const Device& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes); + void operator()(const Device& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes); }; -template -struct Split { +template +struct Split { void operator()(const Eigen::ThreadPoolDevice& d, - typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes); + typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes); }; #ifdef TENSORFLOW_USE_SYCL -template +template struct Split { void operator()(const Eigen::SyclDevice& d, - typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes); + typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes); }; #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/split_lib_cpu.cc b/tensorflow/core/kernels/split_lib_cpu.cc index 771c633b15..a3060e4e90 100644 --- a/tensorflow/core/kernels/split_lib_cpu.cc +++ b/tensorflow/core/kernels/split_lib_cpu.cc @@ -24,12 +24,12 @@ limitations under the License. namespace tensorflow { namespace functor { -template -void Split::operator()( - const Eigen::ThreadPoolDevice& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { +template +void Split::operator()( + const Eigen::ThreadPoolDevice& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes) { if (output.size() < 131072) { output = input.slice(slice_indices, slice_sizes); } else { @@ -37,22 +37,26 @@ void Split::operator()( } } -#define DEFINE_CPU_KERNELS(T) template struct Split; +#define DEFINE_CPU_KERNELS(T) \ + template struct Split; \ + template struct Split; TF_CALL_ALL_TYPES(DEFINE_CPU_KERNELS) DEFINE_CPU_KERNELS(quint8) #ifdef TENSORFLOW_USE_SYCL -template -void Split::operator()( - const Eigen::SyclDevice& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { +template +void Split::operator()( + const Eigen::SyclDevice& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes) { output.device(d) = input.slice(slice_indices, slice_sizes); } -#define DEFINE_SYCL_KERNELS(T) template struct Split; +#define DEFINE_SYCL_KERNELS(T) \ + template struct Split; \ + template struct Split; TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DEFINE_SYCL_KERNELS); #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc index 9f234fc093..393818730b 100644 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc @@ -29,12 +29,12 @@ limitations under the License. namespace tensorflow { namespace functor { -template -void Split::operator()( - const Device& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { +template +void Split::operator()( + const Device& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes) { To32Bit(output).device(d) = To32Bit(input).slice(slice_indices, slice_sizes); } @@ -47,7 +47,9 @@ void SplitCustom::operator()( To32Bit(output).device(d) = To32Bit(input).slice(slice_indices, slice_sizes); } -#define DEFINE_GPU_KERNELS(T) template struct Split; +#define DEFINE_GPU_KERNELS(T) \ + template struct Split; \ + template struct Split; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); TF_CALL_complex64(DEFINE_GPU_KERNELS); diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc index 85f529326d..1bc92a4f70 100644 --- a/tensorflow/core/kernels/split_op.cc +++ b/tensorflow/core/kernels/split_op.cc @@ -121,6 +121,77 @@ class SplitOpBase : public OpKernel { } }; +template +class SplitOpCPUImpl { + public: + template + void operator()(OpKernelContext* context, + const InputReshapedType& input_reshaped, + const TensorShape& input_shape, int32 split_dim, + Eigen::DenseIndex prefix_dim_size, + Eigen::DenseIndex split_dim_size, + Eigen::DenseIndex suffix_dim_size, + const MakeSizesType& make_sizes, + const ReshapeResultType& reshape_result, int32 num_split, + int64 split_dim_output_size) const { + const auto num_threads = + context->device()->tensorflow_cpu_worker_threads()->num_threads; + // TODO(jewillco): Tune heuristic further. + const auto input_element_count = input_shape.num_elements(); + const bool use_parallelism_between_outputs = + (num_split >= 4 && + input_element_count >= std::max(num_threads, num_split) * 4096 && + input_element_count < num_split * 180 * 1024); + Eigen::DSizes indices; + for (int i = 0; i < NDims; ++i) { + indices[i] = 0; + } + auto sizes = make_sizes(split_dim_output_size); + TensorShape output_shape(input_shape); + output_shape.set_dim(split_dim, split_dim_output_size); + + auto range_output_func = [&indices, context, &output_shape, prefix_dim_size, + split_dim_output_size, suffix_dim_size, &sizes, + use_parallelism_between_outputs, &input_reshaped, + &reshape_result](int64 start, int64 limit) { + for (int64 i = start; i < limit; ++i) { + Tensor* result = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(i, output_shape, &result)); + if (prefix_dim_size * split_dim_output_size * suffix_dim_size > 0) { + Eigen::DSizes slice_indices; + Eigen::DSizes slice_sizes; + for (int j = 0; j < NDims; ++j) { + slice_indices[j] = + (j == NDims - 2 ? i * split_dim_output_size : indices[j]); + slice_sizes[j] = sizes[j]; + } + + auto result_shaped = reshape_result(result, split_dim_output_size); + + if (use_parallelism_between_outputs) { + // Use sequential implementation for single output. + result_shaped = input_reshaped.slice(slice_indices, slice_sizes); + } else { + // This implementation may be parallel internally. + functor::Split()( + context->eigen_device(), result_shaped, + input_reshaped, slice_indices, slice_sizes); + } + } + } + }; + if (use_parallelism_between_outputs) { + // Run in parallel, disabling parallelism in functor. + context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor( + num_split, input_element_count / num_split, range_output_func); + } else { + // Run sequentially, but allow internal parallelism in functor. + range_output_func(0, num_split); + } + } +}; + template class SplitOpCPU : public SplitOpBase { public: @@ -154,66 +225,37 @@ class SplitOpCPU : public SplitOpBase { std::tie(prefix_dim_size, split_dim_size, suffix_dim_size) = Base::template SetDims(input_shape, split_dim); - auto input_reshaped = - input.shaped({prefix_dim_size, split_dim_size, suffix_dim_size}); const int64 split_dim_output_size = split_dim_size / num_split; - TensorShape output_shape(input_shape); - output_shape.set_dim(split_dim, split_dim_output_size); - - Eigen::DSizes indices{0, 0, 0}; - const Eigen::DSizes sizes{ - prefix_dim_size, split_dim_output_size, suffix_dim_size}; - - const auto num_threads = - context->device()->tensorflow_cpu_worker_threads()->num_threads; - // TODO(jewillco): Tune heuristic further. - const auto input_element_count = input_shape.num_elements(); - const bool use_parallelism_between_outputs = - (num_split >= 4 && - input_element_count >= std::max(num_threads, num_split) * 4096 && - input_element_count < num_split * 180 * 1024); - - auto range_output_func = [&indices, context, &output_shape, prefix_dim_size, - split_dim_output_size, suffix_dim_size, &sizes, - use_parallelism_between_outputs, - &input_reshaped](int64 start, int64 limit) { - for (int64 i = start; i < limit; ++i) { - Tensor* result = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(i, output_shape, &result)); - if (prefix_dim_size * split_dim_output_size * suffix_dim_size > 0) { - Eigen::DSizes slice_indices; - Eigen::DSizes slice_sizes; - for (int j = 0; j < 3; ++j) { - slice_indices[j] = - (j == 1 ? i * split_dim_output_size : indices[j]); - slice_sizes[j] = sizes[j]; - } - - auto result_shaped = result->shaped( - {prefix_dim_size, split_dim_output_size, suffix_dim_size}); - if (use_parallelism_between_outputs) { - // Use sequential implementation for single output. - result_shaped = input_reshaped.slice(slice_indices, slice_sizes); - } else { - // This implementation may be parallel internally. - functor::Split()(context->eigen_device(), - result_shaped, input_reshaped, - slice_indices, slice_sizes); - } - } - } - }; - if (use_parallelism_between_outputs) { - // Run in parallel, disabling parallelism in functor. - Shard(num_split, - context->device()->tensorflow_cpu_worker_threads()->workers, - num_split, input_element_count / num_split, range_output_func); + if (prefix_dim_size == 1) { + auto input_reshaped = + input.shaped({split_dim_size, suffix_dim_size}); + auto make_sizes = [&](int64 split_size) { + return Eigen::DSizes{split_size, suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, int64 split_size) { + return result->shaped({split_size, suffix_dim_size}); + }; + SplitOpCPUImpl{}( + context, input_reshaped, input_shape, split_dim, prefix_dim_size, + split_dim_size, suffix_dim_size, make_sizes, reshape_result, + num_split, split_dim_output_size); } else { - // Run sequentially, but allow internal parallelism in functor. - range_output_func(0, num_split); + auto input_reshaped = input.shaped( + {prefix_dim_size, split_dim_size, suffix_dim_size}); + auto make_sizes = [&](int64 split_size) { + return Eigen::DSizes{prefix_dim_size, split_size, + suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, int64 split_size) { + return result->shaped( + {prefix_dim_size, split_size, suffix_dim_size}); + }; + SplitOpCPUImpl{}( + context, input_reshaped, input_shape, split_dim, prefix_dim_size, + split_dim_size, suffix_dim_size, make_sizes, reshape_result, + num_split, split_dim_output_size); } } }; diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 7ff5df47d7..16fa890780 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -175,6 +175,76 @@ class SplitVOpBase : public OpKernel { } }; +template +class SplitVOpCPUImpl { + public: + template + void operator()(OpKernelContext* context, + const InputReshapedType& input_reshaped, + const std::vector& split_start_points, + const TensorShape& input_shape, int32 split_dim, + Eigen::DenseIndex prefix_dim_size, + Eigen::DenseIndex split_dim_size, + Eigen::DenseIndex suffix_dim_size, + std::vector& split_sizes_vec, + const MakeSizesType& make_sizes, + const ReshapeResultType& reshape_result) const { + Eigen::DSizes indices; + for (int i = 0; i < NDims; ++i) { + indices[i] = 0; + } + const auto num_threads = + context->device()->tensorflow_cpu_worker_threads()->num_threads; + // TODO(jewillco): Tune heuristic further. + const auto input_element_count = input_shape.num_elements(); + const int num_split = split_start_points.size(); + const bool use_parallelism_between_outputs = + (num_split >= 4 && + input_element_count >= std::max(num_threads, num_split) * 4096 && + input_element_count < num_split * 180 * 1024); + + auto range_output_func = [&indices, context, &input_shape, prefix_dim_size, + split_dim, &split_sizes_vec, &split_start_points, + suffix_dim_size, use_parallelism_between_outputs, + &input_reshaped, &make_sizes, + &reshape_result](int64 start, int64 limit) { + for (int64 i = start; i < limit; ++i) { + TensorShape output_shape(input_shape); + output_shape.set_dim(split_dim, split_sizes_vec[i]); + Tensor* result = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(i, output_shape, &result)); + + const auto sizes = make_sizes(split_sizes_vec[i]); + + if (sizes.TotalSize() > 0) { + auto result_shaped = reshape_result(result, split_sizes_vec[i]); + + auto current_indices = indices; + current_indices[NDims - 2] = split_start_points[i]; + if (use_parallelism_between_outputs) { + // Use sequential implementation for single output. + result_shaped = input_reshaped.slice(current_indices, sizes); + } else { + // This implementation may be parallel internally. + functor::Split()( + context->eigen_device(), result_shaped, + input_reshaped, current_indices, sizes); + } + } + } + }; + if (use_parallelism_between_outputs) { + // Run in parallel, disabling parallelism in functor. + context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor( + num_split, input_element_count / num_split, range_output_func); + } else { + // Run sequentially, but allow internal parallelism in functor. + range_output_func(0, num_split); + } + } +}; + template class SplitVOpCPU : public SplitVOpBase { public: @@ -209,10 +279,6 @@ class SplitVOpCPU : public SplitVOpBase { std::tie(prefix_dim_size, split_dim_size, suffix_dim_size) = Base::template SetDims(input_shape, split_dim); - auto input_reshaped = - input.shaped({prefix_dim_size, split_dim_size, suffix_dim_size}); - - Eigen::DSizes indices{0, 0, 0}; std::vector split_start_points(num_split); for (int i = 0; i < num_split; ++i) { if (i == 0) { @@ -223,55 +289,34 @@ class SplitVOpCPU : public SplitVOpBase { } } - const auto num_threads = - context->device()->tensorflow_cpu_worker_threads()->num_threads; - // TODO(jewillco): Tune heuristic further. - const auto input_element_count = input_shape.num_elements(); - const bool use_parallelism_between_outputs = - (num_split >= 4 && - input_element_count >= std::max(num_threads, num_split) * 4096 && - input_element_count < num_split * 180 * 1024); - - auto range_output_func = [&indices, context, &input_shape, prefix_dim_size, - split_dim, &split_sizes_vec, &split_start_points, - suffix_dim_size, use_parallelism_between_outputs, - &input_reshaped](int64 start, int64 limit) { - for (int64 i = start; i < limit; ++i) { - TensorShape output_shape(input_shape); - output_shape.set_dim(split_dim, split_sizes_vec[i]); - Tensor* result = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(i, output_shape, &result)); - - Eigen::DSizes sizes{ - prefix_dim_size, split_sizes_vec[i], suffix_dim_size}; - - if (sizes.TotalSize() > 0) { - auto result_shaped = result->shaped( - {prefix_dim_size, split_sizes_vec[i], suffix_dim_size}); - - auto current_indices = indices; - current_indices[1] = split_start_points[i]; - if (use_parallelism_between_outputs) { - // Use sequential implementation for single output. - result_shaped = input_reshaped.slice(current_indices, sizes); - } else { - // This implementation may be parallel internally. - functor::Split()(context->eigen_device(), - result_shaped, input_reshaped, - current_indices, sizes); - } - } - } - }; - if (use_parallelism_between_outputs) { - // Run in parallel, disabling parallelism in functor. - Shard(num_split, - context->device()->tensorflow_cpu_worker_threads()->workers, - num_split, input_element_count / num_split, range_output_func); + if (prefix_dim_size == 1) { + auto input_reshaped = + input.shaped({split_dim_size, suffix_dim_size}); + auto make_sizes = [&](Tlen split_size) { + return Eigen::DSizes{split_size, suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, Tlen split_size) { + return result->shaped({split_size, suffix_dim_size}); + }; + SplitVOpCPUImpl{}( + context, input_reshaped, split_start_points, input_shape, split_dim, + prefix_dim_size, split_dim_size, suffix_dim_size, split_sizes_vec, + make_sizes, reshape_result); } else { - // Run sequentially, but allow internal parallelism in functor. - range_output_func(0, num_split); + auto input_reshaped = input.shaped( + {prefix_dim_size, split_dim_size, suffix_dim_size}); + auto make_sizes = [&](Tlen split_size) { + return Eigen::DSizes{prefix_dim_size, split_size, + suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, Tlen split_size) { + return result->shaped( + {prefix_dim_size, split_size, suffix_dim_size}); + }; + SplitVOpCPUImpl{}( + context, input_reshaped, split_start_points, input_shape, split_dim, + prefix_dim_size, split_dim_size, suffix_dim_size, split_sizes_vec, + make_sizes, reshape_result); } } }; diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index af93d814ec..7ec26d95e6 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -1104,9 +1104,9 @@ class TensorArrayUnpackOrScatterOp : public OpKernel { indices[1] = i; if (element_shape.num_elements() > 0) { - functor::Split()(ctx->eigen_device(), - tensor_value_i_t, tensor_value_t, indices, - sizes); + functor::Split()(ctx->eigen_device(), + tensor_value_i_t, tensor_value_t, + indices, sizes); } write_values.push_back(persistent_tensor); @@ -1295,9 +1295,9 @@ class TensorArraySplitOp : public OpKernel { auto tensor_value_i_t = tensor_value_i->shaped( {1, tensor_lengths_t(i), elements_per_row}); - functor::Split()(ctx->eigen_device(), - tensor_value_i_t, tensor_value_t, indices, - sizes); + functor::Split()(ctx->eigen_device(), + tensor_value_i_t, tensor_value_t, + indices, sizes); } write_values.push_back(persistent_tensor); diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index 764b6a252a..4376df34be 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -104,7 +104,7 @@ class UnpackOp : public OpKernel { // Except for shape, unpack is a special case of split, so we reuse the // same computational kernels. auto input_reshaped = - input.shaped({1, before_dim, axis_dim * after_dim}); + input.shaped({before_dim, axis_dim * after_dim}); for (int i = 0; i < num; ++i) { Tensor* output; @@ -112,12 +112,12 @@ class UnpackOp : public OpKernel { context->allocate_output(i, output_shape, &output)); if (output_shape.num_elements() > 0) { - auto output_shaped = output->shaped({1, before_dim, after_dim}); - Eigen::DSizes indices{0, 0, i * after_dim}; - Eigen::DSizes sizes{1, before_dim, after_dim}; - functor::Split()(context->eigen_device(), - output_shaped, input_reshaped, indices, - sizes); + auto output_shaped = output->shaped({before_dim, after_dim}); + Eigen::DSizes indices{0, i * after_dim}; + Eigen::DSizes sizes{before_dim, after_dim}; + functor::Split()(context->eigen_device(), + output_shaped, input_reshaped, indices, + sizes); } } } -- GitLab From 180c457563271b072b33c90bf2f2fbbea450c943 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:38:24 -0800 Subject: [PATCH 073/311] Allow the Ftrl-proximal optimizer parameter 'initial_accumulator_value' to take zero values. PiperOrigin-RevId: 187224701 --- tensorflow/python/training/ftrl.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/training/ftrl.py b/tensorflow/python/training/ftrl.py index 9d02e694db..4fa081fab7 100644 --- a/tensorflow/python/training/ftrl.py +++ b/tensorflow/python/training/ftrl.py @@ -53,7 +53,7 @@ class FtrlOptimizer(optimizer.Optimizer): learning_rate: A float value or a constant float `Tensor`. learning_rate_power: A float value, must be less or equal to zero. initial_accumulator_value: The starting value for accumulators. - Only positive values are allowed. + Only zero or positive values are allowed. l1_regularization_strength: A float value, must be greater than or equal to zero. l2_regularization_strength: A float value, must be greater than or @@ -84,9 +84,10 @@ class FtrlOptimizer(optimizer.Optimizer): """ super(FtrlOptimizer, self).__init__(use_locking, name) - if initial_accumulator_value <= 0.0: - raise ValueError("initial_accumulator_value %f needs to be positive" % - initial_accumulator_value) + if initial_accumulator_value < 0.0: + raise ValueError( + "initial_accumulator_value %f needs to be be positive or zero" % + initial_accumulator_value) if learning_rate_power > 0.0: raise ValueError("learning_rate_power %f needs to be negative or zero" % learning_rate_power) -- GitLab From 1034bb2e69cae7ddd7f26f818e0d8527c5d4c3e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:49:03 -0800 Subject: [PATCH 074/311] Renames sequential_feature_column to sequence_feature_column and adds pydoc. PiperOrigin-RevId: 187226365 --- tensorflow/contrib/feature_column/BUILD | 12 +- tensorflow/contrib/feature_column/__init__.py | 2 +- ...e_column.py => sequence_feature_column.py} | 121 +++++++++++++++++- ...est.py => sequence_feature_column_test.py} | 2 +- 4 files changed, 123 insertions(+), 14 deletions(-) rename tensorflow/contrib/feature_column/python/feature_column/{sequential_feature_column.py => sequence_feature_column.py} (72%) rename tensorflow/contrib/feature_column/python/feature_column/{sequential_feature_column_test.py => sequence_feature_column_test.py} (99%) diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index a53e36c2d5..8ba0823a71 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -25,13 +25,13 @@ py_library( srcs = ["__init__.py"], srcs_version = "PY2AND3", deps = [ - ":sequential_feature_column", + ":sequence_feature_column", ], ) py_library( - name = "sequential_feature_column", - srcs = ["python/feature_column/sequential_feature_column.py"], + name = "sequence_feature_column", + srcs = ["python/feature_column/sequence_feature_column.py"], srcs_version = "PY2AND3", deps = [ "//tensorflow/python:array_ops", @@ -48,12 +48,12 @@ py_library( ) py_test( - name = "sequential_feature_column_test", - srcs = ["python/feature_column/sequential_feature_column_test.py"], + name = "sequence_feature_column_test", + srcs = ["python/feature_column/sequence_feature_column_test.py"], srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ - ":sequential_feature_column", + ":sequence_feature_column", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", diff --git a/tensorflow/contrib/feature_column/__init__.py b/tensorflow/contrib/feature_column/__init__.py index 6da7b12693..650a80144f 100644 --- a/tensorflow/contrib/feature_column/__init__.py +++ b/tensorflow/contrib/feature_column/__init__.py @@ -19,7 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.feature_column.python.feature_column.sequential_feature_column import * +from tensorflow.contrib.feature_column.python.feature_column.sequence_feature_column import * from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py similarity index 72% rename from tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py rename to tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 4ed7268e7a..e99033bbec 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -34,8 +34,7 @@ from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope -# TODO(b/73160931): Fix pydoc. -# pylint: disable=g-doc-args,missing-docstring,protected-access +# pylint: disable=protected-access # TODO(b/73827486): Support SequenceExample. @@ -43,8 +42,7 @@ def sequence_input_layer( features, feature_columns, weight_collections=None, - trainable=True, - scope=None): + trainable=True): """"Builds input layer for sequence input. All `feature_columns` must be sequence dense columns with the same @@ -76,6 +74,17 @@ def sequence_input_layer( rnn_cell, inputs=input_layer, sequence_length=sequence_length) ``` + Args: + features: A dict mapping keys to tensors. + feature_columns: An iterable of dense sequence columns. Valid columns are + - `embedding_column` that wraps a `sequence_categorical_column_with_*` + - `sequence_numeric_column`. + weight_collections: A list of collection names to which the Variable will be + added. Note that variables will also be added to collections + `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. + trainable: If `True` also add the variable to the graph collection + `GraphKeys.TRAINABLE_VARIABLES`. + Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. @@ -84,6 +93,7 @@ def sequence_input_layer( `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. + Raises: ValueError: If any of the `feature_columns` is the wrong type. """ @@ -95,7 +105,7 @@ def sequence_input_layer( 'Given (type {}): {}'.format(type(c), c)) with variable_scope.variable_scope( - scope, default_name='sequence_input_layer', values=features.values()): + None, default_name='sequence_input_layer', values=features.values()): builder = fc._LazyBuilder(features) output_tensors = [] sequence_lengths = [] @@ -124,6 +134,35 @@ def sequence_input_layer( # TODO(b/73160931): Add remaining categorical columns. def sequence_categorical_column_with_identity( key, num_buckets, default_value=None): + """Returns a feature column that represents sequences of integers. + + Example: + + ```python + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + key: A unique string identifying the input feature. + num_buckets: Range of inputs. Namely, inputs are expected to be in the + range `[0, num_buckets)`. + default_value: If `None`, this column's graph operations will fail for + out-of-range inputs. Otherwise, this value must be in the range + `[0, num_buckets)`, and will replace out-of-range inputs. + + Returns: + A `_SequenceCategoricalColumn`. + """ return _SequenceCategoricalColumn( fc.categorical_column_with_identity( key=key, @@ -135,6 +174,46 @@ def sequence_categorical_column_with_identity( def _sequence_embedding_column( categorical_column, dimension, initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True): + """Returns a feature column that represents sequences of embeddings. + + Use this to convert sequence categorical data into dense representation for + input to sequence NN, such as RNN. + + Example: + + ```python + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + categorical_column: A `_SequenceCategoricalColumn` created with a + `sequence_cateogrical_column_with_*` function. + dimension: Integer dimension of the embedding. + initializer: Initializer function used to initialize the embeddings. + ckpt_to_load_from: String representing checkpoint name/pattern from which to + restore column weights. Required if `tensor_name_in_ckpt` is not `None`. + tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from + which to restore the column weights. Required if `ckpt_to_load_from` is + not `None`. + max_norm: If not `None`, embedding values are l2-normalized to this value. + trainable: Whether or not the embedding is trainable. Default is True. + + Returns: + A `_SequenceEmbeddingColumn`. + + Raises: + ValueError: If `categorical_column` is not the right type. + """ if not isinstance(categorical_column, _SequenceCategoricalColumn): raise ValueError( 'categorical_column must be of type _SequenceCategoricalColumn. ' @@ -156,6 +235,33 @@ def sequence_numeric_column( shape=(1,), default_value=0., dtype=dtypes.float32): + """Returns a feature column that represents sequences of numeric data. + + Example: + + ```python + temperature = sequence_numeric_column('temperature') + columns = [temperature] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + key: A unique string identifying the input features. + shape: The shape of the input data per sequence id. E.g. if `shape=(2,)`, + each example must contain `2 * sequence_length` values. + default_value: A single value compatible with `dtype` that is used for + padding the sparse data into a dense `Tensor`. + dtype: The type of values. + + Returns: + A `_SequenceNumericColumn`. + """ # TODO(b/73160931): Add validations. return _SequenceNumericColumn( key, @@ -202,6 +308,7 @@ class _SequenceCategoricalColumn( fc._CategoricalColumn, collections.namedtuple( '_SequenceCategoricalColumn', ['categorical_column'])): + """Represents sequences of categorical data.""" @property def name(self): @@ -254,6 +361,7 @@ class _SequenceCategoricalColumn( class _SequenceEmbeddingColumn( _SequenceDenseColumn, collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): + """Represents sequences of embeddings.""" @property def name(self): @@ -287,6 +395,7 @@ class _SequenceNumericColumn( collections.namedtuple( '_SequenceNumericColumn', ['key', 'shape', 'default_value', 'dtype'])): + """Represents sequences of numeric data.""" @property def name(self): @@ -322,4 +431,4 @@ class _SequenceNumericColumn( return _SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length) -# pylint: enable=g-doc-args,missing-docstring,protected-access +# pylint: enable=protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py similarity index 99% rename from tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py rename to tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 59674869a2..8c37ccf11b 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc +from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc from tensorflow.python.feature_column.feature_column import _LazyBuilder from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -- GitLab From 0a799feaea50d4e48e8daa1f3954427fdccd76f1 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 10:17:15 -0800 Subject: [PATCH 075/311] Generalize the gather_indices dimension that stores indices This is now exposed as a index_vector_dim dimension number. Also fixed an off-by-one error in ValidateGatherDimensionNumbers in the expression computing output_shape_rank. PiperOrigin-RevId: 187040748 --- .../compiler/xla/service/hlo_instruction.cc | 9 +- .../compiler/xla/service/hlo_instruction.h | 3 +- .../xla/service/hlo_instruction_test.cc | 43 +++- .../compiler/xla/service/shape_inference.cc | 42 ++-- .../xla/service/shape_inference_test.cc | 191 ++++++++++++++---- tensorflow/compiler/xla/xla_data.proto | 4 + .../performance/xla/operation_semantics.md | 61 ++++-- 7 files changed, 274 insertions(+), 79 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index b7dd055d7c..a534d8ff06 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1172,7 +1172,8 @@ bool HloInstruction::HasSideEffect() const { /* static */ GatherDimensionNumbers HloInstruction::MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims) { + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim) { GatherDimensionNumbers gather_dim_numbers; for (int64 output_window_dim : output_window_dims) { gather_dim_numbers.add_output_window_dims(output_window_dim); @@ -1184,6 +1185,7 @@ bool HloInstruction::HasSideEffect() const { gather_dim_numbers.add_gather_dims_to_operand_dims(gather_dim_to_input_dim); } + gather_dim_numbers.set_index_vector_dim(index_vector_dim); return gather_dim_numbers; } @@ -3369,9 +3371,12 @@ string HloInstruction::GatherDimensionNumbersToString() const { string gather_dims_to_operand_dims = StrCat( "gather_dims_to_operand_dims={", Join(gather_dimension_numbers_->gather_dims_to_operand_dims(), ","), "}"); + string index_vector_dim = StrCat( + "index_vector_dim=", gather_dimension_numbers_->index_vector_dim()); return Join>( - {output_window_dims, elided_window_dims, gather_dims_to_operand_dims}, + {output_window_dims, elided_window_dims, gather_dims_to_operand_dims, + index_vector_dim}, ", "); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index e4d22e5703..e4c86214c2 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -502,7 +502,8 @@ class HloInstruction { static GatherDimensionNumbers MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims); + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim); // Returns the opcode for this instruction. HloOpcode opcode() const { return opcode_; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 32d3ed272b..f2980d309d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1271,7 +1271,7 @@ TEST_F(HloInstructionTest, Stringification) { "true_computation=%TransposeDot, false_computation=%TransposeDot"); } -TEST_F(HloInstructionTest, StringifyGather) { +TEST_F(HloInstructionTest, StringifyGather_0) { Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); Shape gather_indices_tensor_shape = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); @@ -1291,7 +1291,8 @@ TEST_F(HloInstructionTest, StringifyGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); HloModule module(TestName()); @@ -1303,7 +1304,43 @@ TEST_F(HloInstructionTest, StringifyGather) { "s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), " "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " "gather_dims_to_operand_dims={0,1,2,3,4}, " - "window_bounds={30,29,28,27,26}"); + "index_vector_dim=4, window_bounds={30,29,28,27,26}"); +} + +TEST_F(HloInstructionTest, StringifyGather_1) { + Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); + Shape gather_indices_tensor_shape = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); + Shape gather_result_shape = + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}); + + HloComputation::Builder builder("Gather"); + HloInstruction* input = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_tensor_shape, "input_tensor")); + HloInstruction* gather_indices = + builder.AddInstruction(HloInstruction::CreateParameter( + 1, gather_indices_tensor_shape, "gather_indices")); + + HloInstruction* gather_instruction = + builder.AddInstruction(HloInstruction::CreateGather( + gather_result_shape, input, gather_indices, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + HloModule module(TestName()); + module.AddEntryComputation(builder.Build()); + + EXPECT_EQ(gather_instruction->ToString(), + "%gather = f32[10,9,7,6,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} " + "gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, " + "s64[10,9,5,7,6]{4,3,2,1,0} %gather_indices), " + "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " + "gather_dims_to_operand_dims={0,1,2,3,4}, " + "index_vector_dim=2, window_bounds={30,29,28,27,26}"); } } // namespace diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index c9692757b2..607a672025 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -2467,27 +2467,27 @@ static Status ValidateGatherDimensionNumbers( const int64 output_window_dim_count = dim_numbers.output_window_dims_size(); const int64 output_shape_rank = - output_window_dim_count + gather_indices_shape.size(); + output_window_dim_count + gather_indices_shape.size() - 1; for (int i = 0; i < dim_numbers.output_window_dims_size(); ++i) { int64 window_index = dim_numbers.output_window_dims(i); if (window_index < 0 || window_index >= output_shape_rank) { return InvalidArgument( "Window index %d in gather op is out of bounds; got %lld, but should " - "have been in" - "[0,%lld)", + "have been in [0,%lld)", i, window_index, output_shape_rank); } } if (dim_numbers.gather_dims_to_operand_dims_size() != - gather_indices_shape.back()) { + gather_indices_shape[dim_numbers.index_vector_dim()]) { return InvalidArgument( - "There must be exactly as many elements in gather_dims_to_operand_dims " - "as there are elements in the last dimension of %%gather_indices; got: " - "%d, expected %lld", + "Gather op has %d elements in gather_dims_to_operand_dims and the " + "bound of dimension index_vector_dim=%lld of gather_indices is " + "%lld. These two numbers must be equal.", dim_numbers.gather_dims_to_operand_dims_size(), - gather_indices_shape.back()); + dim_numbers.index_vector_dim(), + gather_indices_shape[dim_numbers.index_vector_dim()]); } for (int i = 0; i < dim_numbers.gather_dims_to_operand_dims_size(); i++) { @@ -2550,24 +2550,33 @@ static Status ValidateGatherDimensionNumbers( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( gather_indices_shape, "gather indices operand of gather op")); - if (gather_indices_shape.dimensions_size() < 1) { + if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { return InvalidArgument( - "Gather indices parameter must at least of rank 1; got %s", + "Gather indices parameter must be an integral tensor; got %s", ShapeUtil::HumanString(gather_indices_shape).c_str()); } - if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { + // We implicitly reshape gather indices of shape P[A,B,C] to P[A,B,C,1] if + // index_vector_dim is rank(P). The bounds of this expanded shape is + // stored in expanded_gather_indices_shape. + + if (gather_indices_shape.dimensions_size() < + gather_dim_numbers.index_vector_dim() || + gather_dim_numbers.index_vector_dim() < 0) { return InvalidArgument( - "Gather indices parameter must be an integral tensor; got %s", - ShapeUtil::HumanString(gather_indices_shape).c_str()); + "Gather index leaf dimension must be within [0, rank(gather_indices) + " + "1). rank(gather_indices) is %d and gather index leaf dimension is " + "%lld.", + gather_indices_shape.dimensions_size(), + gather_dim_numbers.index_vector_dim()); } std::vector expanded_gather_indices_shape; - // We implicitly reshape gather indices of shape P[N] to P[N,1]. expanded_gather_indices_shape.reserve(gather_indices_shape.dimensions_size()); c_copy(gather_indices_shape.dimensions(), std::back_inserter(expanded_gather_indices_shape)); - if (expanded_gather_indices_shape.size() == 1) { + if (expanded_gather_indices_shape.size() == + gather_dim_numbers.index_vector_dim()) { expanded_gather_indices_shape.push_back(1); } @@ -2632,6 +2641,9 @@ static Status ValidateGatherDimensionNumbers( } current_bound = window_bounds[window_dims_seen++]; } else { + if (gather_dims_seen == gather_dim_numbers.index_vector_dim()) { + gather_dims_seen++; + } current_bound = expanded_gather_indices_shape[gather_dims_seen++]; } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 7eb120843f..029d2b3b86 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -1530,11 +1530,17 @@ TEST_F(ShapeInferenceTest, BadSlice) { class GatherShapeInferenceTest : public ShapeInferenceTest { protected: + const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {}); + const Shape s64_vector_5_ = ShapeUtil::MakeShape(S64, {5}); const Shape s64_vector_32_ = ShapeUtil::MakeShape(S64, {32}); const Shape s64_4d_tensor_10_9_8_7_1_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 1}); const Shape s64_4d_tensor_10_9_8_7_5_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); + const Shape s64_4d_tensor_5_10_9_7_6_ = + ShapeUtil::MakeShape(S64, {5, 10, 9, 7, 6}); + const Shape s64_4d_tensor_10_9_5_7_6_ = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); const Shape f32_5d_tensor_50_49_48_47_46_ = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -1548,7 +1554,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {64, 32}))) @@ -1562,7 +1569,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherV2) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{1}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/1), /*window_bounds=*/{1, 48})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {32, 48}))) @@ -1576,7 +1584,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherNd) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 48})); EXPECT_TRUE(ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {10, 9, 8, 7, 48}))) @@ -1591,7 +1600,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); EXPECT_TRUE(ShapeUtil::Equal( gather_shape, @@ -1599,12 +1609,85 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { << ShapeUtil::HumanString(gather_shape); } +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_A) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_B) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_5_10_9_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NoOutputGatherDims) { + // This is equivalent to a dynamic slice. + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_vector_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3, 4}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, ScalarGatherIndices) { + // The gather indices "tensor" is a scalar S here that's used to slice out + // [S,0,0,0,0]..[S,30,29,28,27] into a [30,29,28,27] shaped result. + TF_ASSERT_OK_AND_ASSIGN(Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_scalar_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3}, + /*elided_window_dims=*/{0}, + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/0), + /*window_bounds=*/{1, 30, 29, 28, 27})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27}))) + << ShapeUtil::HumanString(gather_shape); +} + TEST_F(GatherShapeInferenceTest, TupleShapedTensorInput) { StatusOr statusor = ShapeInference::InferGatherShape( tuple_shape_, s64_vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1617,7 +1700,8 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { s64_vector_32_, tuple_shape_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1625,25 +1709,13 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { << statusor.status(); } -TEST_F(GatherShapeInferenceTest, ScalarGatherIndicesInput) { - StatusOr statusor = ShapeInference::InferGatherShape( - s64_vector_32_, s32_, - HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, - /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), - /*window_bounds=*/{64, 1}); - ASSERT_FALSE(statusor.ok()); - EXPECT_THAT(statusor.status().error_message(), - HasSubstr("Gather indices parameter must at least of rank 1")) - << statusor.status(); -} - TEST_F(GatherShapeInferenceTest, FloatingPointGatherIndicesInput) { StatusOr statusor = ShapeInference::InferGatherShape( s64_vector_32_, vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1658,7 +1730,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 8, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1674,7 +1747,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1690,7 +1764,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 99, 100, 101}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1698,6 +1773,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, + InvalidGatherDimNumbers_WindowIndexBarelyOutOfBounds) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_8_7_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 9}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), + /*window_bounds=*/{30, 29, 28, 27, 26}); + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Window index 4 in gather op is out of bounds")) + << statusor.status(); +} + TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_MismatchingElidedWindowDims) { StatusOr statusor = ShapeInference::InferGatherShape( @@ -1705,7 +1796,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{4}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1722,7 +1814,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 19}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1738,7 +1831,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 3}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1755,15 +1849,15 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( statusor.status().error_message(), - HasSubstr( - "There must be exactly as many elements in " - "gather_dims_to_operand_dims " - "as there are elements in the last dimension of %gather_indices")) + HasSubstr("Gather op has 4 elements in gather_dims_to_operand_dims and " + "the bound of dimension index_vector_dim=4 of " + "gather_indices is 5. These two numbers must be equal.")) << statusor.status(); } @@ -1774,7 +1868,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1791,7 +1886,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1808,7 +1904,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{2, 1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 1, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1822,7 +1919,8 @@ TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_WindowBoundsTooLarge) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{2}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 1, 300, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1838,7 +1936,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1855,7 +1954,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26, 20}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1864,5 +1964,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, OutOfBoundsGatherIndicesLeafDim) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/32), + /*window_bounds=*/{30, 29, 28, 27, 26}); + + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Gather index leaf dimension must be within [0, " + "rank(gather_indices) + 1)")) + << statusor.status(); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 28620c3b86..1f16e6d251 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -418,6 +418,10 @@ message GatherDimensionNumbers { // transforms the gather index looked up from the gather_indices tensor into // the starting index in the input space. repeated int64 gather_dims_to_operand_dims = 3; + + // The dimension in the gather_indices input that contains the starting + // indices. + int64 index_vector_dim = 4; } // Operation requests that are all collected as a tagged union with a oneof diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index b0abf5fdd2..b2190c5243 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -1050,6 +1050,9 @@ For a more intuitive description, see the "Informal Description" section below. : : : indices of the slices we're : : : : we're stitching together into : : : : the output tensor. : +|`index_vector_dim` | `int64` | The dimension in | +: : : `gather_indices` that contains : +: : : the starting indices. : |`output_window_dims` | `ArraySlice` | The set of dimensions in the | : : : output shape that are _window : : : : dimensions_ (defined below). : @@ -1066,22 +1069,20 @@ For a more intuitive description, see the "Informal Description" section below. : : : `output_window_dims`) and the window : : : : dimensions that are elided (via : : : : `elided_window_dims`). : -|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | +|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | : : : array is interpreted as mapping `i` to : : : : `gather_dims_to_operand_dims[i]`) from : : : : the gather indices in `gather_indices` to : : : : the operand index space. It has to be : : : : one-to-one and total. : -If `gather_indices` is a vector with `N` elements then we implicitly reshape it -to a tensor of shape `[N,1]` before proceeding. - For every index `Out` in the output tensor, we compute two things (more precisely described later): - - An index into the first `gather_indices.rank` - `1` dimensions of - `gather_indices`, which gives us a starting index of a slice, _operand - slice_, in the operand tensor. + - An index into `gather_indices.rank` - `1` dimensions of `gather_indices`, + which gives us a starting index of a slice, _operand slice_, in the operand + tensor. These `gather_indices.rank` - `1` dimensions are all the dimensions + in `gather_indices` except `index_vector_dim`. - A _window index_ that has the same rank as the operand. This index is composed of the values in `Out` at dimensions `output_window_dims`, embedded @@ -1093,29 +1094,42 @@ should be present in the output at index `Out`. The output is a tensor of rank `output_window_dims.size` + `gather_indices.rank` - `1`. Additionally, as a shorthand, we define `output_gather_dims` of type `ArraySlice` as the set of dimensions in the output shape but not in -`output_window_dims`, in ascending order. E.g. if the output tensor has rank 5, -`output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, `3`} +`output_window_dims`, in ascending order. E.g. if the output tensor has rank +`5`, `output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, +`3`} + +If `index_vector_dim` is equal to `gather_indices.rank` we implicitly +consider `gather_indices` to have a trailing `1` dimension (i.e. if +`gather_indices` was of shape `[6,7]` and `index_vector_dim` is `2` then +we implicitly consider the shape of `gather_indices` to be `[6,7,1]`). The bounds for the output tensor along dimension `i` is computed as follows: 1. If `i` is present in `output_gather_dims` (i.e. is equal to - `output_gather_dims[k]` for some `k`) then we pick the corresponding - dimension bounds out of `gather_indices.shape` (i.e. pick - `gather_indices.shape.dims[k]`). + `output_gather_dims[k]` for some `k`) then we pick the corresponding + dimension bounds out of `gather_indices.shape`, skipping + `index_vector_dim` (i.e. pick `gather_indices.shape.dims`[`k`] if `k` + < `index_vector_dim` and `gather_indices.shape.dims`[`k`+`1`] + otherwise). 2. If `i` is present in `output_window_dims` (i.e. equal to - `output_window_dims[k]` for some `k`) then we pick the corresponding bound - out of `window_bounds` after accounting for `elided_window_dims` (i.e. we - pick `adjusted_window_bounds[k]` where `adjusted_window_bounds` is - `window_bounds` with the bounds at indices `elided_window_dims` removed). + `output_window_dims`[`k`] for some `k`) then we pick the corresponding + bound out of `window_bounds` after accounting for `elided_window_dims` + (i.e. we pick `adjusted_window_bounds`[`k`] where `adjusted_window_bounds` + is `window_bounds` with the bounds at indices `elided_window_dims` + removed). The operand index `In` corresponding to an output index `Out` is computed as follows: 1. Let `G` = { `Out`[`k`] for `k` in `output_gather_dims` }. Use `G` to slice - out vector `S` such that `S`[`i`] = `gather_indices`[`G`, `i`]. - 2. Create an index, `S``in`, into `operand` using `S` by scattering - `S` using the `gather_dims_to_operand_dims` map (`S``in` is the - starting indices for _operand slice_ mentioned above.). More precisely: + out vector `S` such that `S`[`i`] = `gather_indices`[Combine(`G`, `i`)] + where Combine(A, b) inserts b at position `index_vector_dim` into A. + Note that this is well defined even if `G` is empty -- if `G` is empty then + `S` = `gather_indices`. + 2. Create an index, `S``in`, into `operand` using `S` by + scattering `S` using the `gather_dims_to_operand_dims` map + (`S``in` is the starting indices for _operand slice_ mentioned + above). More precisely: 1. `S``in`[`gather_dims_to_operand_dims`[`k`]] = `S`[`k`] if `k` < `gather_dims_to_operand_dims.size`. 2. `S``in`[`_`] = `0` otherwise. @@ -1136,7 +1150,12 @@ follows: `operand.rank` is `6` and `elided_window_dims` is {`0`, `2`} then `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}. -### Informal Description +### Informal Description and Examples + +`index_vector_dim` is set to `gather_indices.rank` - `1` in all of the +examples that follow. More interesting values for `index_vector_dim` +does not change the operation fundamentally, but makes the visual representation +more cumbersome. To get an intuition on how all of the above fits together, let's look at an example that gathers 5 slices of shape `[8,6]` from a `[16,11]` tensor. The -- GitLab From 1fc324c6701bc179ca73908731857e8a582437b5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 10:24:08 -0800 Subject: [PATCH 076/311] Arithemtic optimization: Rewite Sub(0, y) => Neg(y) PiperOrigin-RevId: 187041872 --- .../grappler/optimizers/constant_folding.cc | 18 +++++++++++++++++- .../grappler/optimizers/constant_folding.h | 1 + .../optimizers/constant_folding_test.cc | 7 +++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 182e03f04e..10ca7dcce0 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1434,6 +1434,17 @@ void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node, graph_modified_ = true; } +void ConstantFolding::ReplaceSubtractionFromZeroByNegation(NodeDef* node, + GraphDef* graph) { + node->set_op("Neg"); + node->mutable_input()->SwapElements(0, 1); + const string ctrl_dep = + AddControlDependency(node->input(1), graph, node_map_.get()); + node_map_->UpdateInput(node->name(), node->input(1), ctrl_dep); + node->set_input(1, ctrl_dep); + graph_modified_ = true; +} + Status ConstantFolding::ReplaceOperationWithConstant( double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph) { @@ -1636,12 +1647,17 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); if (y_matches_output_shape && ((is_mul && x_is_one) || (is_add && x_is_zero))) { - // TODO(rmlarsen): Handle subtraction 0 - y. // 1 * y = y or 0 + y = y. ReplaceOperationWithSnapshot(1, node, output); continue; } + if (y_matches_output_shape && (is_sub && x_is_zero)) { + // Replace 0 - y with Neg(y). + ReplaceSubtractionFromZeroByNegation(node, output); + continue; + } + // Replace 1 / y with Reciprocal op. if (y_matches_output_shape && is_any_div && x_is_one) { DataType type = node->attr().at("T").type(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 232b2f9fa0..2fd59c7f9c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -82,6 +82,7 @@ class ConstantFolding : public GraphOptimizer { GraphDef* graph); void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, GraphDef* graph); + void ReplaceSubtractionFromZeroByNegation(NodeDef* node, GraphDef* graph); Status ReplaceOperationWithConstant(double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 219f3bd5ec..c6540192d7 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -286,10 +286,9 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "sub2") { - // We don't handle this case yet. - EXPECT_EQ("Sub", node.op()); - EXPECT_EQ("zeros", node.input(0)); - EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("Neg", node.op()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("^zeros", node.input(1)); } const std::set square_zero_const{"mul1", "mul2", "mul5", "mul6", "matmul1", "matmul2"}; -- GitLab From 620348fb6d045dc1f644925a3828ebb12de944d7 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 10:24:56 -0800 Subject: [PATCH 077/311] Move accumulate_n_v2 to core. PiperOrigin-RevId: 187042001 --- tensorflow/contrib/framework/BUILD | 38 ------ .../framework/python/ops/accumulate_n_v2.py | 111 ------------------ tensorflow/python/kernel_tests/BUILD | 34 ++++++ .../kernel_tests/accumulate_n_eager_test.py} | 27 ++--- .../kernel_tests/accumulate_n_test.py} | 34 +++--- tensorflow/python/ops/math_ops.py | 81 ++++++------- 6 files changed, 99 insertions(+), 226 deletions(-) delete mode 100644 tensorflow/contrib/framework/python/ops/accumulate_n_v2.py rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_eager_test.py => python/kernel_tests/accumulate_n_eager_test.py} (72%) rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_test.py => python/kernel_tests/accumulate_n_test.py} (79%) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index dbdb5cfaac..1accb319d2 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -28,7 +28,6 @@ tf_custom_op_py_library( "python/framework/graph_util.py", "python/framework/tensor_util.py", "python/ops/__init__.py", - "python/ops/accumulate_n_v2.py", "python/ops/arg_scope.py", "python/ops/audio_ops.py", "python/ops/checkpoint_ops.py", @@ -161,23 +160,6 @@ py_test( ], ) -py_test( - name = "accumulate_n_v2_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:platform_test", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - cuda_py_test( name = "critical_section_test", size = "medium", @@ -196,26 +178,6 @@ cuda_py_test( ], ) -py_test( - name = "accumulate_n_v2_eager_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_eager_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python/eager:backprop", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:tape", - "//third_party/py/numpy", - ], -) - py_test( name = "ops_test", size = "small", diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py deleted file mode 100644 index 476528b0dd..0000000000 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Ops that will eventually be folded into tensorflow/python/ops/math_ops.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from tensorflow.python.eager import context -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops import math_ops - - - -def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): - """Returns the element-wise sum of a list of tensors. - - Optionally, pass `shape` and `tensor_dtype` for shape and type checking, - otherwise, these are inferred. - - `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not - wait for all of its inputs to be ready before beginning to sum. This can - save memory if inputs are ready at different times, since minimum temporary - storage is proportional to the output size rather than the inputs size. - - Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. - - For example: - - ```python - a = tf.constant([[1, 2], [3, 4]]) - b = tf.constant([[5, 0], [0, 6]]) - tf.accumulate_n_v2([a, b, a]) # [[7, 4], [6, 14]] - - # Explicitly pass shape and type - tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) - # [[7, 4], - # [6, 14]] - ``` - - Args: - inputs: A list of `Tensor` objects, each with same shape and type. - shape: Shape of elements of `inputs`. - tensor_dtype: The type of `inputs`. - name: A name for the operation (optional). - - Returns: - A `Tensor` of same shape and type as the elements of `inputs`. - - Raises: - ValueError: If `inputs` don't all have same shape and dtype or the shape - cannot be inferred. - """ - _INPUTS_ERR_MSG = ValueError("inputs must be a list of at least one Tensor" - "with the same dtype and shape") - if not inputs or not isinstance(inputs, (list, tuple)): - raise _INPUTS_ERR_MSG - inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) - if not all(isinstance(x, ops.Tensor) for x in inputs): - raise _INPUTS_ERR_MSG - if not all(x.dtype == inputs[0].dtype for x in inputs): - raise _INPUTS_ERR_MSG - if shape is not None: - shape = tensor_shape.as_shape(shape) - else: - shape = tensor_shape.unknown_shape() - for input_tensor in inputs: - if isinstance(input_tensor, ops.Tensor): - shape = shape.merge_with(input_tensor.get_shape()) - - # tensor_dtype is for safety only; operator's output type computed in C++ - if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}" - .format(tensor_dtype, inputs[0].dtype)) - - if len(inputs) == 1 and name is None: - return inputs[0] - elif len(inputs) == 1 and name is not None: - return array_ops.identity(inputs[0], name=name) - elif context.in_eager_mode(): - # TemporaryVariable not currently supported in eager mode; fall back - # onto AddN for now. - # TODO(frreiss) remove this once the lifetime of eager variables gets - # addressed - return math_ops.add_n(inputs, name=name) - else: - return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) - -# The following code should eventually be merged into -# tensorflow/python/ops/math_grad.py -@ops.RegisterGradient("AccumulateNV2") -def _AddNGrad(op, grad): - """Same as gradient for AddN. Copies the gradient to all inputs.""" - # Not broadcasting. - return [grad] * len(op.inputs) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d4ceb2e489..c9aa4a252d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2892,6 +2892,40 @@ tf_py_test( ], ) +tf_py_test( + name = "accumulate_n_test", + size = "small", + srcs = ["accumulate_n_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + +tf_py_test( + name = "accumulate_n_eager_test", + size = "small", + srcs = ["accumulate_n_eager_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:tape", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py similarity index 72% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py rename to tensorflow/python/kernel_tests/accumulate_n_eager_test.py index 35974b9e21..dc11b7dece 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py @@ -12,48 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`. - -These test cases spefically exercise the `eager` APIs. They need to be in a -separate file from the remaining tests because eager mode is currently something -you can turn on but can't turn off for the lifetime of the current process.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test - class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testMinimalEagerMode(self): forty = constant_op.constant(40) two = constant_op.constant(2) - answer = av2.accumulate_n_v2([forty, two]) + answer = math_ops.accumulate_n([forty, two]) self.assertEqual(42, answer.numpy()) - def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).numpy()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).numpy()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).numpy()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).numpy()) def testGrad(self): np.random.seed(42) @@ -65,16 +58,14 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): ] def fn(first, second, third): - return av2.accumulate_n_v2([first, second, third]) + return math_ops.accumulate_n([first, second, third]) grad_fn = backprop.gradients_function(fn) grad = grad_fn(input_vars[0], input_vars[1], input_vars[2]) - self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 + self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 [elem.numpy() for elem in grad]) - if __name__ == "__main__": ops.enable_eager_execution() test.main() - diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py similarity index 79% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py rename to tensorflow/python/kernel_tests/accumulate_n_test.py index 45962098e9..0a6d4aea37 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_test.py @@ -12,42 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import googletest class AccumulateNV2Test(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).eval()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).eval()) def testInt(self): np.random.seed(54321) x = [np.random.randint(-128, 128, (5, 4, 3, 2, 1)) for _ in range(6)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllEqual(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllEqual(x[0] * 6, av2.accumulate_n_v2([tf_x[0]] * 6).eval()) + self.assertAllEqual(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllEqual(x[0] * 6, + math_ops.accumulate_n([tf_x[0]] * 6).eval()) def testGrad(self): np.random.seed(42) @@ -55,9 +55,9 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True) as sess: input_vars = [ variables.Variable(10.0 * np.random.random()) - for i in range(0, num_inputs) + for _ in range(0, num_inputs) ] - accum_n = av2.accumulate_n_v2(input_vars) + accum_n = math_ops.accumulate_n(input_vars) sess.run(variables.global_variables_initializer()) accum_n_grad = gradients.gradients(accum_n, input_vars) self.assertAllEqual( @@ -77,7 +77,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): ops.convert_to_tensor(x, dtype=dtypes_lib.float32) for x in random_arrays ] - tf_val = av2.accumulate_n_v2(random_tensors) + tf_val = math_ops.accumulate_n(random_tensors) np_val = random_arrays[0] for random_array in random_arrays[1:]: np_val += random_array @@ -86,7 +86,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): def testZeroArgs(self): with self.test_session(): with self.assertRaises(ValueError): - tf_val = av2.accumulate_n_v2([]) + tf_val = math_ops.accumulate_n([]) tf_val.eval() def testWrongShape(self): @@ -94,28 +94,28 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): a = variables.Variable(0.2) b = variables.Variable(0.1) - tf_val = av2.accumulate_n_v2([a, b], shape=[2, 2]) # Should be shape=[] + math_ops.accumulate_n([a, b], shape=[2, 2]) # Should be shape=[] def testIncompatibleShapes(self): with self.test_session(): with self.assertRaises(ValueError): a = variables.Variable(np.array([0.1, 0.2])) b = variables.Variable(np.array([[0.3], [0.4]])) - tf_val = av2.accumulate_n_v2([a, b]) + math_ops.accumulate_n([a, b]) def testWrongType(self): with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) b = variables.Variable(0.1, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a, b], tensor_dtype=np.int32) + math_ops.accumulate_n([a, b], tensor_dtype=np.int32) def testWrongTypeOneInput(self): # Scenario that used to trigger a bug, even when testWrongType() worked with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) + math_ops.accumulate_n([a], tensor_dtype=np.int32) if __name__ == "__main__": diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 2ae8b610da..ed11fe5348 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -161,14 +161,11 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_control_flow_ops from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import gen_spectral_ops -from tensorflow.python.ops import gen_state_ops -from tensorflow.python.ops import state_ops # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_math_ops import * @@ -2218,14 +2215,12 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): Optionally, pass `shape` and `tensor_dtype` for shape and type checking, otherwise, these are inferred. - NOTE: This operation is not differentiable and cannot be used if inputs depend - on trainable variables. Please use `tf.add_n` for such cases. + `tf.accumulate_n` performs the same operation as `tf.add_n`, but does not + wait for all of its inputs to be ready before beginning to sum. This can + save memory if inputs are ready at different times, since minimum temporary + storage is proportional to the output size rather than the inputs size. - Aside from differentiability, `tf.accumulate_n` performs the same operation as - `tf.add_n`, but does not wait for all of its inputs to be ready before - beginning to sum. This can save memory if inputs are ready at different times, - since minimum temporary storage is proportional to the output size rather than - the inputs size. + `accumulate_n` is differentiable (but wasn't previous to TensorFlow 1.7). For example: @@ -2235,8 +2230,9 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): tf.accumulate_n([a, b, a]) # [[7, 4], [6, 14]] # Explicitly pass shape and type - tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) # [[7, 4], - # [6, 14]] + tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) + # [[7, 4], + # [6, 14]] ``` Args: @@ -2252,20 +2248,17 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): ValueError: If `inputs` don't all have same shape and dtype or the shape cannot be inferred. """ - if context.in_eager_mode(): - # TODO(apassos) remove this once the lifetime of eager variables gets - # addressed. - raise ValueError("accumulate_n not supported in eager mode") + def _input_error(): + return ValueError( + "inputs must be a list of at least one Tensor with the " + "same dtype and shape") if not inputs or not isinstance(inputs, (list, tuple)): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) if not all(isinstance(x, ops.Tensor) for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if not all(x.dtype == inputs[0].dtype for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if shape is not None: shape = tensor_shape.as_shape(shape) else: @@ -2273,27 +2266,31 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): for input_tensor in inputs: if isinstance(input_tensor, ops.Tensor): shape = shape.merge_with(input_tensor.get_shape()) - if tensor_dtype is None: - tensor_dtype = inputs[0].dtype - if tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}".format( - tensor_dtype, inputs[0].dtype)) - if len(inputs) == 1: + + # tensor_dtype is for safety only; operator's output type computed in C++ + if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: + raise TypeError("tensor_dtype is {}, but input is of type {}" + .format(tensor_dtype, inputs[0].dtype)) + + if len(inputs) == 1 and name is None: return inputs[0] - with ops.name_scope(name, "AccumulateN", inputs) as name: - var = gen_state_ops._temporary_variable( - shape=tensor_shape.vector(0), dtype=tensor_dtype) - with ops.colocate_with(var): - zeros = array_ops.zeros_like(gen_control_flow_ops._merge(inputs)[0]) - zeros.set_shape(shape) - ref = state_ops.assign(var, zeros, validate_shape=False) - update_ops = [ - state_ops.assign_add(ref, input_tensor, use_locking=True) - for input_tensor in inputs - ] - with ops.control_dependencies(update_ops): - return gen_state_ops._destroy_temporary_variable( - ref, var_name=var.op.name, name=name) + elif len(inputs) == 1 and name is not None: + return array_ops.identity(inputs[0], name=name) + elif context.in_eager_mode(): + # TemporaryVariable not currently supported in eager mode; fall back + # onto AddN for now. + # TODO(frreiss) remove this once the lifetime of eager variables gets + # addressed + return add_n(inputs, name=name) + else: + return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access + + +@ops.RegisterGradient("AccumulateNV2") +def _accumulate_n_grad(op, grad): + """Same as gradient for AddN. Copies the gradient to all inputs.""" + # Not broadcasting. + return [grad] * len(op.inputs) @tf_export("nn.sigmoid", "sigmoid") -- GitLab From feeb6c095ffa15b555298122840f0542ee986eac Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 26 Feb 2018 10:41:44 -0800 Subject: [PATCH 078/311] Deleting references to outdated `translate/seq2seq` tutorial. PiperOrigin-RevId: 187044697 --- tensorflow/tools/ci_build/builds/test_tutorials.sh | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/test_tutorials.sh b/tensorflow/tools/ci_build/builds/test_tutorials.sh index 67e5af5564..db335f14ca 100755 --- a/tensorflow/tools/ci_build/builds/test_tutorials.sh +++ b/tensorflow/tools/ci_build/builds/test_tutorials.sh @@ -277,17 +277,6 @@ test_ptb_word_lm() { fi } - -# ----------------------------------------------------------- -# translate_test -test_translate_test() { - LOG_FILE=$1 - - run_in_directory "${TEST_DIR}" "${LOG_FILE}" \ - "${TF_MODELS_DIR}/tutorials/rnn/translate/translate.py" --self_test=True -} - - # Run the tutorial tests test_runner "tutorial test-on-install" \ "${TUT_TESTS}" "${TF_BUILD_TUT_TEST_BLACKLIST}" "${LOGS_DIR}" -- GitLab From f487340e7628802b1b8c3b12747f3b9ce9254af3 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 26 Feb 2018 10:42:59 -0800 Subject: [PATCH 079/311] [XLA] Add kConvert to EffectiveOperandPrecisionIsOutputPrecision list. PiperOrigin-RevId: 187044921 --- tensorflow/compiler/xla/service/bfloat16_support.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc index 3fd9e24601..07b4b14b5e 100644 --- a/tensorflow/compiler/xla/service/bfloat16_support.cc +++ b/tensorflow/compiler/xla/service/bfloat16_support.cc @@ -79,6 +79,7 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision( case HloOpcode::kBroadcast: case HloOpcode::kClamp: case HloOpcode::kConcatenate: + case HloOpcode::kConvert: case HloOpcode::kCopy: case HloOpcode::kGetTupleElement: case HloOpcode::kMaximum: -- GitLab From c6312773dd5473fb47f73c88c2f5c8f41e20c0fa Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 10:52:05 -0800 Subject: [PATCH 080/311] [XLA] Do not recompute flattened sets inside layout assignment. Cache the flattened sets instead of recomputing them. This matters for large graphs, since we may request the flattened set thousands of times on the same instruction, and it may be fairly expensive to construct for large tuples. PiperOrigin-RevId: 187046642 --- .../compiler/xla/service/layout_assignment.cc | 31 ++++++++++++++----- .../compiler/xla/service/layout_assignment.h | 10 ++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 0668f66051..4929300f7d 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -192,17 +192,34 @@ LayoutConstraints::LayoutConstraints( } } +PointsToSet::BufferSet* LayoutConstraints::GetBufferSet( + const HloInstruction* instruction) const { + auto it = buffer_sets_cache_.find(instruction); + if (it != buffer_sets_cache_.end()) { + return it->second.get(); + } + auto& buffer_set = + buffer_sets_cache_ + .emplace(instruction, MakeUnique()) + .first->second; + const auto& points_to_set = points_to_analysis_.GetPointsToSet(instruction); + points_to_set.ForEachElement( + [&buffer_set](const ShapeIndex& /*index*/, + const PointsToSet::BufferList& buffers) { + buffer_set->insert(buffers.begin(), buffers.end()); + }); + return buffer_set.get(); +} + bool LayoutConstraints::OperandBufferForwarded( const HloInstruction* instruction, int64 operand_no) const { // The operand is potentially forwarded if the intersection of points-to sets // of the operand and the instruction is non-empty. - auto output_buffers = - points_to_analysis_.GetPointsToSet(instruction).CreateFlattenedSet(); - auto operand_buffers = - points_to_analysis_.GetPointsToSet(instruction->operand(operand_no)) - .CreateFlattenedSet(); - for (const LogicalBuffer* output_buffer : output_buffers) { - if (operand_buffers.count(output_buffer) > 0) { + PointsToSet::BufferSet* output_buffers = GetBufferSet(instruction); + PointsToSet::BufferSet* operand_buffers = + GetBufferSet(instruction->operand(operand_no)); + for (const LogicalBuffer* output_buffer : *output_buffers) { + if (operand_buffers->count(output_buffer) > 0) { return true; } } diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 2901858448..7126cb50cf 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -199,6 +200,11 @@ class LayoutConstraints { string ToString() const; private: + // Find a bufferset in the bufferset cache. This is useful since we can + // currently create the flattened buffer set for the same instruction many + // times, which is often slow. + PointsToSet::BufferSet* GetBufferSet(const HloInstruction* instruction) const; + // The set of BufferLayoutConstraints applied to the computation. std::unordered_map buffer_constraints_; @@ -221,6 +227,10 @@ class LayoutConstraints { // Array-shaped buffers which have not yet been constrained. std::set unconstrained_buffer_ids_; + mutable tensorflow::gtl::FlatMap> + buffer_sets_cache_; + HloComputation* computation_; }; -- GitLab From 616de9709cbd1ec2b06a036db628bed04b143560 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 10:54:31 -0800 Subject: [PATCH 081/311] Integrate ClusterResolvers with TPUEstimator. PiperOrigin-RevId: 187047094 --- tensorflow/contrib/cluster_resolver/BUILD | 1 + .../python/training/cluster_resolver.py | 23 +- .../python/training/cluster_resolver_test.py | 2 + .../python/training/gce_cluster_resolver.py | 3 + .../python/training/tpu_cluster_resolver.py | 150 +++++++++--- .../training/tpu_cluster_resolver_test.py | 226 +++++++++++++----- .../contrib/tpu/python/tpu/tpu_config.py | 31 +++ 7 files changed, 345 insertions(+), 91 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/BUILD b/tensorflow/contrib/cluster_resolver/BUILD index 6b03df2b8e..1a124eca36 100644 --- a/tensorflow/contrib/cluster_resolver/BUILD +++ b/tensorflow/contrib/cluster_resolver/BUILD @@ -110,5 +110,6 @@ tf_py_test( "//tensorflow/python:platform_test", "//tensorflow/python:training", ], + grpc_enabled = True, main = "python/training/tpu_cluster_resolver_test.py", ) diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py index b04822fa9d..1c480b2513 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py @@ -53,11 +53,16 @@ class ClusterResolver(object): raise NotImplementedError( 'cluster_spec is not implemented for {}.'.format(self)) + @abc.abstractmethod + def master(self): + """...""" + raise NotImplementedError('master is not implemented for {}.'.format(self)) + class SimpleClusterResolver(ClusterResolver): """Simple implementation of ClusterResolver that accepts a ClusterSpec.""" - def __init__(self, cluster_spec): + def __init__(self, cluster_spec, master=''): """Creates a SimpleClusterResolver from a ClusterSpec.""" super(SimpleClusterResolver, self).__init__() @@ -65,10 +70,18 @@ class SimpleClusterResolver(ClusterResolver): raise TypeError('cluster_spec must be a ClusterSpec.') self._cluster_spec = cluster_spec + if not isinstance(master, str): + raise TypeError('master must be a string.') + self._master = master + def cluster_spec(self): """Returns the ClusterSpec passed into the constructor.""" return self._cluster_spec + def master(self): + """Returns the master address to use when creating a session.""" + return self._master + class UnionClusterResolver(ClusterResolver): """Performs a union on underlying ClusterResolvers. @@ -87,9 +100,13 @@ class UnionClusterResolver(ClusterResolver): Raises: TypeError: If any argument is not a subclass of `ClusterResolvers`. + ValueError: If there are no arguments passed. """ super(UnionClusterResolver, self).__init__() + if not args: + raise ValueError('At least one ClusterResolver is required.') + for cluster_resolver in args: if not isinstance(cluster_resolver, ClusterResolver): raise TypeError('All arguments must be a sub-class of ' @@ -169,3 +186,7 @@ class UnionClusterResolver(ClusterResolver): merged_cluster[job_name].update(task_dict) return ClusterSpec(merged_cluster) + + def master(self): + """master returns the master address from the first cluster resolver.""" + return self._cluster_resolvers[0].master() diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py index dbfb77723c..d9c97d53eb 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py @@ -234,5 +234,7 @@ class UnionClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(cluster_spec, expected_proto) +# TODO(saeta): Include tests for master resolution + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py index d6f2eced93..3f58241289 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py @@ -134,3 +134,6 @@ class GceClusterResolver(ClusterResolver): worker_list.sort() return ClusterSpec({self._job_name: worker_list}) + + def master(self): + return '' diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index a6a6e642e4..aeccf4c06b 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -23,7 +23,8 @@ from six.moves.urllib.request import Request from six.moves.urllib.request import urlopen from tensorflow.contrib.cluster_resolver.python.training.cluster_resolver import ClusterResolver -from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat _GOOGLE_API_CLIENT_INSTALLED = True try: @@ -46,13 +47,23 @@ class TPUClusterResolver(ClusterResolver): req = Request('http://metadata/computeMetadata/v1/%s' % path, headers={'Metadata-Flavor': 'Google'}) resp = urlopen(req) - return resp.read() + return compat.as_bytes(resp.read()) + + def _shouldResolve(self): + if (self._tpu == compat.as_bytes('') or + self._tpu == compat.as_bytes('local') or + self._tpu.startswith(compat.as_bytes('/bns')) or + self._tpu.startswith(compat.as_bytes('grpc://'))): + return False + return True def __init__(self, - tpu_names, + tpu, zone=None, project=None, - job_name='tpu_worker', + job_name='worker', + coordinator_name='coordinator', + coordinator_address=None, credentials='default', service=None): """Creates a new TPUClusterResolver object. @@ -61,7 +72,11 @@ class TPUClusterResolver(ClusterResolver): for the IP addresses and ports of each Cloud TPU listed. Args: - tpu_names: A list of names of the target Cloud TPUs. + tpu: Either a string, or a list of strings corresponding to the TPUs to + use. If the single string is the empty string, the string 'local', or a + string that begins with 'grpc://' or '/bns', then it is assumed to not + correspond with a Cloud TPU and will instead be passed as the session + master and no ClusterSpec propagation will be done. zone: Zone where the TPUs are located. If omitted or empty, we will assume that the zone of the TPU is the same as the zone of the GCE VM, which we will try to discover from the GCE metadata service. @@ -69,6 +84,12 @@ class TPUClusterResolver(ClusterResolver): empty, we will try to discover the project name of the GCE VM from the GCE metadata service. job_name: Name of the TensorFlow job the TPUs belong to. + coordinator_name: The name to use for the coordinator. Set to None if the + coordinator should not be included in the computed ClusterSpec. + coordinator_address: The address of the coordinator (typically an ip:port + pair). If set to None, a TF server will be started. If coordinator_name + is None, a TF server will not be started even if coordinator_address is + None. credentials: GCE Credentials. If None, then we use default credentials from the oauth2client service: The GCE API object returned by the googleapiclient.discovery @@ -77,26 +98,36 @@ class TPUClusterResolver(ClusterResolver): Raises: ImportError: If the googleapiclient is not installed. + ValueError: If no TPUs are specified. """ + if isinstance(tpu, list): + if not tpu: + raise ValueError('At least one TPU must be specified.') + if len(tpu) != 1: + raise NotImplementedError( + 'Using multiple TPUs in a single session is not yet implemented') + tpu = tpu[0] + self._tpu = compat.as_bytes(tpu) # self._tpu is always bytes + self._job_name = job_name + self._credentials = credentials - if not project: - project = self._requestComputeMetadata('/project/project-id') + should_resolve = self._shouldResolve() - if not zone: - zone_path = self._requestComputeMetadata('/instance/zone') + if not project and should_resolve: + project = self._requestComputeMetadata('project/project-id') + + if not zone and should_resolve: + zone_path = self._requestComputeMetadata('instance/zone') zone = zone_path.split('/')[-1] self._project = project self._zone = zone - self._tpu_names = tpu_names - self._job_name = job_name - self._credentials = credentials - if credentials == 'default': + if credentials == 'default' and should_resolve: if _GOOGLE_API_CLIENT_INSTALLED: self._credentials = GoogleCredentials.get_application_default() - if service is None: + if service is None and should_resolve: if not _GOOGLE_API_CLIENT_INSTALLED: raise ImportError('googleapiclient must be installed before using the ' 'TPU cluster resolver') @@ -107,25 +138,41 @@ class TPUClusterResolver(ClusterResolver): else: self._service = service - def get_master(self): - """Get the ClusterSpec grpc master path. + self._coordinator_name = coordinator_name + if coordinator_name and not coordinator_address and should_resolve: + self._start_local_server() + else: + self._coordinator_address = coordinator_address + + def master(self): + """Get the Master string to be used for the session. + + In the normal case, this returns the grpc path (grpc://1.2.3.4:8470) of + first instance in the ClusterSpec returned by the cluster_spec function. - This returns the grpc path (grpc://1.2.3.4:8470) of first instance in the - ClusterSpec returned by the cluster_spec function. This is suitable for use - for the `master` argument in tf.Session() when you are using one TPU. + If a non-TPU name is used when constructing a TPUClusterResolver, that will + be returned instead (e.g. If the tpus argument's value when constructing + this TPUClusterResolver was 'grpc://10.240.1.2:8470', + 'grpc://10.240.1.2:8470' will be returned). Returns: - string, the grpc path of the first instance in the ClusterSpec. + string, the connection string to use when creating a session. Raises: ValueError: If none of the TPUs specified exists. """ + if not self._shouldResolve(): + return self._tpu + job_tasks = self.cluster_spec().job_tasks(self._job_name) if not job_tasks: raise ValueError('No TPUs exists with the specified names exist.') return 'grpc://' + job_tasks[0] + def get_master(self): + return self.master() + def cluster_spec(self): """Returns a ClusterSpec object based on the latest TPU information. @@ -134,17 +181,54 @@ class TPUClusterResolver(ClusterResolver): Returns: A ClusterSpec containing host information returned from Cloud TPUs. - """ - worker_list = [] - - for tpu_name in self._tpu_names: - full_name = 'projects/%s/locations/%s/nodes/%s' % ( - self._project, self._zone, tpu_name) - request = self._service.projects().locations().nodes().get(name=full_name) - response = request.execute() - if 'health' in response and response['health'] == 'HEALTHY': - instance_url = '%s:%s' % (response['ipAddress'], response['port']) - worker_list.append(instance_url) - - return ClusterSpec({self._job_name: worker_list}) + Raises: + RuntimeError: If the provided TPU is not healthy. + """ + if not self._shouldResolve(): + return server_lib.ClusterSpec({}) + + full_name = 'projects/%s/locations/%s/nodes/%s' % ( + self._project, self._zone, compat.as_text(self._tpu)) + request = self._service.projects().locations().nodes().get(name=full_name) + response = request.execute() + + if 'health' in response and response['health'] != 'HEALTHY': + raise RuntimeError('TPU "%s" is unhealthy: "%s"' % (self._tpu, + response['health'])) + + if 'networkEndpoints' in response: + worker_list = [ + '%s:%s' % (endpoint['ipAddress'], endpoint['port']) + for endpoint in response['networkEndpoints'] + ] + else: + # Fall back to the deprecated response format + instance_url = '%s:%s' % (response['ipAddress'], response['port']) + worker_list = [instance_url] + + cluster_spec = {self._job_name: worker_list} + + if self._coordinator_address: + cluster_spec[self._coordinator_name] = [self._coordinator_address] + + return server_lib.ClusterSpec(cluster_spec) + + def _start_local_server(self): + address = self._requestComputeMetadata('instance/network-interfaces/0/ip') + self._server = server_lib.Server( + { + 'local': ['0.0.0.0:0'] + }, protocol='grpc', config=None, start=True) + # self._server.target is of the form: grpc://ipaddress:port + target = compat.as_bytes(self._server.target) + splits = target.split(compat.as_bytes(':')) + assert len(splits) == 3, self._server.target + assert splits[0] == compat.as_bytes('grpc'), self._server.target + self._coordinator_port = compat.as_text(splits[2]) + self._coordinator_address = '%s:%s' % ( + address, compat.as_text(self._coordinator_port)) + + def __deepcopy__(self, memo): + # TODO(b/73668574): Remove this once RunConfig avoids performing deepcopy. + return self diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 4fd34629cf..6b4a155152 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.contrib.cluster_resolver.python.training.tpu_cluster_resolver import TPUClusterResolver from tensorflow.python.platform import test from tensorflow.python.training import server_lib - +from tensorflow.python.util import compat mock = test.mock @@ -50,10 +50,12 @@ class MockNodeClass(object): def mock_request_compute_metadata(cls, *args, **kwargs): del cls, kwargs # Unused. - if args[0] == '/project/project-id': + if args[0] == 'project/project-id': return 'test-project' - elif args[0] == '/instance/zone': + elif args[0] == 'instance/zone': return 'projects/test-project/locations/us-central1-c' + elif args[0] == 'instance/network-interfaces/0/ip': + return '10.128.1.2' return '' @@ -113,17 +115,26 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver( project=None, zone=None, - tpu_names=['test-tpu-1'], + tpu=['test-tpu-1'], credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } - """ - self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + job { + name: 'coordinator' + tasks { key: 0 value: '10.128.1.2:%s' } + } + job { + name: 'worker' + tasks { key: 0 value: '10.1.2.3:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) - def testSimpleSuccessfulRetrieval(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', @@ -133,116 +144,217 @@ class TPUClusterResolverTest(test.TestCase): } tpu_cluster_resolver = TPUClusterResolver( - project='test-project', - zone='us-central1-c', - tpu_names=['test-tpu-1'], + project=None, + zone=None, + tpu=['test-tpu-1'], + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testMultipleSuccessfulRetrieval(self): + def testSimpleSuccessfulRetrieval(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', 'port': '8470', 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu=['test-tpu-1'], + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.4.5.6:8470' } - tasks { key: 1 value: '10.1.2.3:8470' } } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testHealthyTpuNodeRetrieval(self): + def testNewNetworkEndpointFormat(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-3': { - 'ipAddress': '10.7.8.9', - 'port': '8470', - 'health': 'UNHEALTHY' + 'health': 'HEALTHY', + 'networkEndpoints': [{ + 'ipAddress': '10.2.3.4', + 'port': 8470, + }] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1', 'test-tpu-3'], + tpu='test-tpu-1', + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { - name: 'tpu_worker' - tasks { - key: 0 - value: '10.1.2.3:8470' - } - } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.2.3.4:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + self.assertEqual('grpc://10.2.3.4:8470', tpu_cluster_resolver.master()) - def testGetMasterMultipleEntries(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testPodResolution(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] + } + } + + tpu_cluster_resolver = TPUClusterResolver( + tpu='test-tpu-1', + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'coordinator', + tasks { key: 0 value: '10.128.1.2:%s'} + } + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) + + def testPodResolutionNoCoordinator(self): + tpu_map = { + 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu='test-tpu-1', + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) - self.assertEqual('grpc://10.4.5.6:8470', tpu_cluster_resolver.get_master()) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ + self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) def testGetMasterNoEntries(self): tpu_map = {} + with self.assertRaises(ValueError): + TPUClusterResolver( + project='test-project', + zone='us-central1-c', + tpu=[], + coordinator_name=None, + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + # TODO(saeta): Convert to parameterized test when included in OSS TF. + def verifyShouldResolve(self, tpu, should_resolve): tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=[], + tpu=tpu, + coordinator_name=None, credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) - with self.assertRaises(ValueError): - tpu_cluster_resolver.get_master() + service=self.mock_service_client(tpu_map={})) + self.assertEqual(should_resolve, tpu_cluster_resolver._shouldResolve(), + "TPU: '%s'" % tpu) + + def testShouldResolveNoName(self): + self.verifyShouldResolve('', False) + + def testShouldResolveLocal(self): + self.verifyShouldResolve('local', False) + + def testShouldResolveGrpc(self): + self.verifyShouldResolve('grpc://10.1.2.3:8470', False) + + def testShouldResolveBns(self): + self.verifyShouldResolve('/bns/foo/bar', False) + + def testShouldResolveName(self): + self.verifyShouldResolve('mytpu', True) + + def testShouldResolveList(self): + self.verifyShouldResolve(['myothertpu'], True) + + def testShouldResolveGrpcPrefix(self): + self.verifyShouldResolve('grpctpu', True) + + def testNoCallComputeMetadata(self): + tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar') + self.assertEqual(compat.as_bytes('/bns/foo/bar'), + tpu_cluster_resolver.master()) + self.assertEqual( + server_lib.ClusterSpec({}), tpu_cluster_resolver.cluster_spec()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 6440702182..7ceb4069cf 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -26,6 +26,7 @@ import os import numpy as np from tensorflow.contrib.tpu.python.tpu import util as util_lib +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.platform import tf_logging as logging @@ -140,6 +141,7 @@ class RunConfig(run_config_lib.RunConfig): tpu_config=None, evaluation_master=None, master=None, + cluster=None, **kwargs): """Constructs a RunConfig. @@ -148,15 +150,26 @@ class RunConfig(run_config_lib.RunConfig): evaluation_master: a string. The address of the master to use for eval. Defaults to master if not set. master: a string. The address of the master to use for training. + cluster: a ClusterResolver **kwargs: keyword config parameters. + + Raises: + ValueError: if cluster is not None and the provided session_config has a + cluster_def already. """ super(RunConfig, self).__init__(**kwargs) self._tpu_config = tpu_config or TPUConfig() + self._cluster = cluster # If user sets master and/or evaluation_master explicilty, including empty # string '', take it. Otherwise, take the values set by parent class. if master is not None: + if cluster is not None: + raise ValueError('Both master and cluster are set.') self._master = master + else: + if cluster: + self._master = cluster.master() if evaluation_master is not None: self._evaluation_master = evaluation_master @@ -170,6 +183,20 @@ class RunConfig(run_config_lib.RunConfig): # evaluation_master to master, unless user overwrites it. self._evaluation_master = self._master + # Set the ClusterSpec to use + if cluster: + self._cluster_spec = cluster.cluster_spec() + + # Merge the cluster_def into the ConfigProto. + if self._session_config is None: # pylint: disable=access-member-before-definition + self._session_config = config_pb2.ConfigProto(allow_soft_placement=True) + if self._session_config.HasField('cluster_def'): + raise ValueError( + 'You cannot provide a ClusterResolver and ' + 'session_config.cluster_def.') + self._session_config.cluster_def.CopyFrom( + self._cluster_spec.as_cluster_def()) + @property def evaluation_master(self): return self._evaluation_master @@ -182,6 +209,10 @@ class RunConfig(run_config_lib.RunConfig): def tpu_config(self): return self._tpu_config + @property + def cluster(self): + return self._cluster + def replace(self, **kwargs): if 'tpu_config' not in kwargs: return super(RunConfig, self).replace(**kwargs) -- GitLab From 3af99b657f23e52d9c291d488fa3bb2a68e90022 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Mon, 26 Feb 2018 10:59:54 -0800 Subject: [PATCH 082/311] Automated g4 rollback of changelist 185324160 PiperOrigin-RevId: 187048135 --- tensorflow/contrib/cmake/tf_core_cpu.cmake | 7 ++ tensorflow/contrib/makefile/Makefile | 1 + .../core/common_runtime/gpu/gpu_id_manager.cc | 50 +++++++-- .../core/common_runtime/gpu/gpu_id_manager.h | 14 ++- tensorflow/core/grappler/clusters/BUILD | 26 ++++- .../core/grappler/clusters/single_machine.cc | 17 ++- tensorflow/core/grappler/clusters/utils.cc | 71 ++++++++----- tensorflow/core/grappler/clusters/utils.h | 3 +- .../core/grappler/clusters/utils_test.cc | 100 ++++++++++++++++++ tensorflow/core/grappler/costs/BUILD | 1 + tensorflow/core/grappler/costs/utils.cc | 18 +++- 11 files changed, 262 insertions(+), 46 deletions(-) create mode 100644 tensorflow/core/grappler/clusters/utils_test.cc diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake index 96ac60d095..a54cbff33b 100644 --- a/tensorflow/contrib/cmake/tf_core_cpu.cmake +++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake @@ -63,6 +63,12 @@ file(GLOB_RECURSE tf_core_cpu_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc" ) +file(GLOB_RECURSE tf_core_cpu_whitelisted_srcs + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.h" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc" +) +list(REMOVE_ITEM tf_core_cpu_exclude_srcs ${tf_core_cpu_whitelisted_srcs}) list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs}) if (tensorflow_ENABLE_GPU) @@ -79,6 +85,7 @@ if (tensorflow_ENABLE_GPU) "${tensorflow_source_dir}/tensorflow/core/*test*.cc" ) list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_gpu_exclude_srcs}) + list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_cpu_whitelisted_srcs}) list(APPEND tf_core_cpu_srcs ${tf_core_gpu_srcs}) endif() diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 81327407d4..05e8d9064b 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -677,6 +677,7 @@ endif # TEGRA TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) # Add in any extra files that don't fit the patterns easily TF_CC_SRCS += tensorflow/contrib/makefile/downloads/fft2d/fftsg.c +TF_CC_SRCS += tensorflow/core/common_runtime/gpu/gpu_id_manager.cc # Also include the op and kernel definitions. TF_CC_SRCS += $(shell cat $(MAKEFILE_DIR)/tf_op_files.txt) PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt) diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc index 207afdca75..7dfff3269c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc @@ -18,7 +18,10 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" namespace tensorflow { @@ -27,8 +30,8 @@ namespace { class TfToCudaGpuIdMap { public: static TfToCudaGpuIdMap* singleton() { - static auto* manager = new TfToCudaGpuIdMap; - return manager; + static auto* id_map = new TfToCudaGpuIdMap; + return id_map; } void InsertOrDie(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) @@ -47,18 +50,41 @@ class TfToCudaGpuIdMap { } } - int32 FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { + CudaGpuId FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); + return FindOrDieLocked(tf_gpu_id); + } + + bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const + LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + if (id_map_.count(tf_gpu_id.value()) == 0) return false; + *cuda_gpu_id = FindOrDieLocked(tf_gpu_id); + return true; + } + + private: + TfToCudaGpuIdMap() = default; + + CudaGpuId FindOrDieLocked(TfGpuId tf_gpu_id) const + EXCLUSIVE_LOCKS_REQUIRED(mu_) { auto result = id_map_.find(tf_gpu_id.value()); CHECK(result != id_map_.end()) << "Could not find the mapping for TfGpuId: " << tf_gpu_id; - return result->second; + return CudaGpuId(result->second); + } + + void TestOnlyReset() LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + id_map_.clear(); } - private: using IdMapType = std::unordered_map; mutable mutex mu_; IdMapType id_map_ GUARDED_BY(mu_); + + friend class ::tensorflow::GpuIdManager; + TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap); }; } // namespace @@ -67,8 +93,20 @@ void GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, TfToCudaGpuIdMap::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id); } +Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { + if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) { + return Status::OK(); + } + return errors::NotFound("TF GPU device with id ", tf_gpu_id.value(), + " was not registered"); +} + CudaGpuId GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id) { - return CudaGpuId(TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id)); + return TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id); +} + +void GpuIdManager::TestOnlyReset() { + TfToCudaGpuIdMap::singleton()->TestOnlyReset(); } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h index 33925d8c36..2b54cc184c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h @@ -17,15 +17,25 @@ limitations under the License. #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_MANAGER_H_ #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/status.h" namespace tensorflow { -// Class that manages the translation between Tensorflow GPU ids and CUDA GPU -// ids. +// Class that maintains a map from TfGpuId to CudaGpuId, and manages the +// translation between them. class GpuIdManager { public: + // Adds a mapping from tf_gpu_id to cuda_gpu_id. static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); + + // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found. + static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id); + // Similar to the above version, but returns the result, and checks fail if + // no result is found. static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id); + + // Clears the map. Used in unit tests only. + static void TestOnlyReset(); }; } // namespace tensorflow diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD index b8f8e13c9a..b653f902e8 100644 --- a/tensorflow/core/grappler/clusters/BUILD +++ b/tensorflow/core/grappler/clusters/BUILD @@ -1,7 +1,12 @@ licenses(["notice"]) # Apache 2.0 +load("//tensorflow:tensorflow.bzl", "if_cuda") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") +load( + "//tensorflow/core:platform/default/build_config_root.bzl", + "tf_cuda_tests_tags", +) filegroup( name = "all_files", @@ -26,13 +31,12 @@ config_setting( tf_cuda_library( name = "utils", srcs = ["utils.cc"], - hdrs = [ - "utils.h", - ], + hdrs = ["utils.h"], visibility = ["//visibility:public"], deps = [ "//third_party/eigen3", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ] + select({ @@ -41,6 +45,21 @@ tf_cuda_library( }), ) +tf_cc_test( + name = "utils_test", + srcs = ["utils_test.cc"], + linkstatic = if_cuda(1, 0), + tags = tf_cuda_tests_tags(), + deps = [ + ":utils", + "//tensorflow/core:gpu_id", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "cluster", srcs = ["cluster.cc"], @@ -104,6 +123,7 @@ cc_library( "//tensorflow/core:core_cpu_lib", "//tensorflow/core:direct_session", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core/grappler:utils", "//tensorflow/core/kernels:ops_util", diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index cc7f418d49..8e236c9ee8 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -21,6 +21,8 @@ limitations under the License. #include "tensorflow/cc/training/queue_runner.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/grappler/clusters/utils.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/kernels/ops_util.h" @@ -80,13 +82,24 @@ Status SingleMachine::Provision() { std::vector devices; TF_RETURN_IF_ERROR(session_->ListDevices(&devices)); - int gpu_id = 0; for (const auto& dev : devices) { DeviceProperties attr; if (dev.device_type() == "CPU") { attr = GetLocalCPUInfo(); } else if (dev.device_type() == "GPU") { - attr = GetLocalGPUInfo(gpu_id++); + DeviceNameUtils::ParsedName parsed; + if (!DeviceNameUtils::ParseFullName(dev.name(), &parsed)) { + return errors::InvalidArgument( + strings::StrCat("Not able to parse GPU device name: ", dev.name())); + } + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + return errors::Unavailable("Unknown TF GPU device with id ", + tf_gpu_id.value(), ": ", s.ToString()); + } + attr = GetLocalGPUInfo(cuda_gpu_id); } else if (dev.device_type().find("XLA") == string::npos) { // Filter out the fake XLA devices to avoid double counting the actual // hardware resources that are available. diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc index 607e10e1ab..b54b34959a 100644 --- a/tensorflow/core/grappler/clusters/utils.cc +++ b/tensorflow/core/grappler/clusters/utils.cc @@ -27,6 +27,9 @@ limitations under the License. #include "include/libxsmm.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" @@ -66,36 +69,40 @@ DeviceProperties GetLocalCPUInfo() { return device; } -DeviceProperties GetLocalGPUInfo(int gpu_id) { +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) { DeviceProperties device; device.set_type("GPU"); #if GOOGLE_CUDA cudaDeviceProp properties; - cudaError_t error = cudaGetDeviceProperties(&properties, gpu_id); - if (error == cudaSuccess) { - device.set_vendor("NVidia"); - device.set_model(properties.name); - device.set_frequency(properties.clockRate * 1e-3); - device.set_num_cores(properties.multiProcessorCount); - device.set_num_registers(properties.regsPerMultiprocessor); - // For compute capability less than 5, l1 cache size is configurable to - // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For - // compute capability larger or equal to 5, l1 cache (unified with texture - // cache) size is 24 KB. This number may need to be updated for future - // compute capabilities. - device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); - device.set_l2_cache_size(properties.l2CacheSize); - device.set_l3_cache_size(0); - device.set_shared_memory_size_per_multiprocessor( - properties.sharedMemPerMultiprocessor); - device.set_memory_size(properties.totalGlobalMem); - // 8 is the number of bits per byte. 2 is accounted for - // double data rate (DDR). - device.set_bandwidth(properties.memoryBusWidth / 8 * - properties.memoryClockRate * 2); + cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value()); + if (error != cudaSuccess) { + device.set_type("UNKNOWN"); + LOG(ERROR) << "Failed to get device properties, error code: " << error; + return device; } + device.set_vendor("NVIDIA"); + device.set_model(properties.name); + device.set_frequency(properties.clockRate * 1e-3); + device.set_num_cores(properties.multiProcessorCount); + device.set_num_registers(properties.regsPerMultiprocessor); + // For compute capability less than 5, l1 cache size is configurable to + // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For + // compute capability larger or equal to 5, l1 cache (unified with texture + // cache) size is 24 KB. This number may need to be updated for future + // compute capabilities. + device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); + device.set_l2_cache_size(properties.l2CacheSize); + device.set_l3_cache_size(0); + device.set_shared_memory_size_per_multiprocessor( + properties.sharedMemPerMultiprocessor); + device.set_memory_size(properties.totalGlobalMem); + // 8 is the number of bits per byte. 2 is accounted for + // double data rate (DDR). + device.set_bandwidth(properties.memoryBusWidth / 8 * + properties.memoryClockRate * 2); + (*device.mutable_environment())["architecture"] = strings::StrCat(properties.major, ".", properties.minor); (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION); @@ -106,18 +113,26 @@ DeviceProperties GetLocalGPUInfo(int gpu_id) { } DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + if (device.type == "CPU") { return GetLocalCPUInfo(); } else if (device.type == "GPU") { if (device.has_id) { - return GetLocalGPUInfo(device.id); + TfGpuId tf_gpu_id(device.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else { - return GetLocalGPUInfo(0); + return GetLocalGPUInfo(CudaGpuId(0)); } } - DeviceProperties result; - result.set_type("UNKNOWN"); - return result; + return unknown; } } // end namespace grappler diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h index 191942040a..df8e7dca44 100644 --- a/tensorflow/core/grappler/clusters/utils.h +++ b/tensorflow/core/grappler/clusters/utils.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ #define TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" #include "tensorflow/core/protobuf/device_properties.pb.h" #include "tensorflow/core/util/device_name_utils.h" @@ -27,7 +28,7 @@ DeviceProperties GetLocalCPUInfo(); // Returns the DeviceProperties for the specified GPU attached to the server on // which grappler is running. -DeviceProperties GetLocalGPUInfo(int gpu_id); +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id); // Returns the DeviceProperties of the specified device DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device); diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc new file mode 100644 index 0000000000..74218adbac --- /dev/null +++ b/tensorflow/core/grappler/clusters/utils_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/clusters/utils.h" + +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/device_properties.pb.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(UtilsTest, GetLocalGPUInfo) { + GpuIdManager::TestOnlyReset(); +#if GOOGLE_CUDA + LOG(INFO) << "CUDA is enabled."; + DeviceProperties properties; + + // Invalid CUDA GPU ID. + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Succeed when a valid CUDA GPU id was inserted. + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#else + LOG(INFO) << "CUDA is not enabled."; + DeviceProperties properties; + + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("GPU", properties.type()); +#endif +} + +TEST(UtilsTest, GetDeviceInfo) { + GpuIdManager::TestOnlyReset(); + DeviceNameUtils::ParsedName device; + DeviceProperties properties; + + // Invalid type. + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Cpu info. + device.type = "CPU"; + properties = GetDeviceInfo(device); + EXPECT_EQ("CPU", properties.type()); + + // No TF GPU id provided. + device.type = "GPU"; + device.has_id = false; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); +#if GOOGLE_CUDA + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif + + // TF to CUDA GPU id mapping entry doesn't exist. + device.has_id = true; + device.id = 0; + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + +#if GOOGLE_CUDA + // Invalid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100)); + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Valid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0)); + device.id = 1; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 0fe01e9c9e..5336df1f51 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -142,6 +142,7 @@ tf_cuda_library( "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:graph", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 602f69f12e..076945d5c6 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -26,6 +26,8 @@ limitations under the License. #include "cuda/include/cudnn.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" @@ -200,17 +202,25 @@ std::vector FindInputFeatures( } DeviceProperties GetDeviceInfo(const string& device_str) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + DeviceNameUtils::ParsedName parsed; if (DeviceNameUtils::ParseFullName(device_str, &parsed)) { if (parsed.type == "GPU") { - return GetLocalGPUInfo(parsed.id); + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else if (parsed.type == "CPU") { return GetLocalCPUInfo(); } } - DeviceProperties device; - device.set_type("UNKNOWN"); - return device; + return unknown; } DeviceProperties GetDeviceInfo(const CostGraphDef::Node& node) { -- GitLab From 387e0e51a3a8b6c7752bb198bf1fdfa1ebf12b60 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:08:54 -0800 Subject: [PATCH 083/311] Bring in `isbuiltin`. PiperOrigin-RevId: 187049824 --- tensorflow/python/util/tf_inspect.py | 5 +++++ tensorflow/python/util/tf_inspect_test.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index c2fe6fc449..a7cead5555 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -149,6 +149,11 @@ def getsource(object): # pylint: disable=redefined-builtin return _inspect.getsource(tf_decorator.unwrap(object)[1]) +def isbuiltin(object): # pylint: disable=redefined-builtin + """TFDecorator-aware replacement for inspect.isbuiltin.""" + return _inspect.isbuiltin(tf_decorator.unwrap(object)[1]) + + def isclass(object): # pylint: disable=redefined-builtin """TFDecorator-aware replacement for inspect.isclass.""" return _inspect.isclass(tf_decorator.unwrap(object)[1]) diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py index 8903e1156b..129408449e 100644 --- a/tensorflow/python/util/tf_inspect_test.py +++ b/tensorflow/python/util/tf_inspect_test.py @@ -144,6 +144,19 @@ def test_decorated_function_with_defaults(a, b=2, c='Hello'): self.assertEqual( expected, tf_inspect.getsource(test_decorated_function_with_defaults)) + def testIsBuiltin(self): + self.assertEqual( + tf_inspect.isbuiltin(TestDecoratedClass), + inspect.isbuiltin(TestDecoratedClass)) + self.assertEqual( + tf_inspect.isbuiltin(test_decorated_function), + inspect.isbuiltin(test_decorated_function)) + self.assertEqual( + tf_inspect.isbuiltin(test_undecorated_function), + inspect.isbuiltin(test_undecorated_function)) + self.assertEqual(tf_inspect.isbuiltin(range), inspect.isbuiltin(range)) + self.assertEqual(tf_inspect.isbuiltin(max), inspect.isbuiltin(max)) + def testIsClass(self): self.assertTrue(tf_inspect.isclass(TestDecoratedClass)) self.assertFalse(tf_inspect.isclass(test_decorated_function)) -- GitLab From 2513479d7b39235f9504ede2bf6f61cb78aae923 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 26 Feb 2018 11:10:20 -0800 Subject: [PATCH 084/311] eager/examples/resnet50: Fix breakage. PiperOrigin-RevId: 187050075 --- .../contrib/eager/python/examples/resnet50/resnet50_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index c106ab0a06..65dcc53aab 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -194,11 +194,11 @@ class ResNet50Benchmarks(tf.test.Benchmark): with tf.device(device): images, _ = random_batch(batch_size) for _ in xrange(num_burn): - model(images).cpu() + model(images, training=False).cpu() gc.collect() start = time.time() for _ in xrange(num_iters): - model(images).cpu() + model(images, training=False).cpu() self._report(label, start, num_iters, device, batch_size, data_format) def benchmark_eager_apply(self): -- GitLab From 5a9343b2ac7011593fb2ad2e7c82119181e608ec Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 11:12:04 -0800 Subject: [PATCH 085/311] Add support for parsing the "gather" HLO PiperOrigin-RevId: 187050345 --- .../compiler/xla/tools/parser/hlo_parser.cc | 37 +++++++++++++++++-- .../xla/tools/parser/hlo_parser_test.cc | 24 ++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index cd2b843ad3..e60a5a4919 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -1049,9 +1049,40 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateDot(shape, operands[0], operands[1], dnum)); break; } - case HloOpcode::kGather: - // TODO(b/72710576): HLO parsing is not implemented for Gather. - return TokenError("HLO parsing is not implemented for Gather"); + case HloOpcode::kGather: { + optional> output_window_dims; + attrs["output_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &output_window_dims}; + optional> elided_window_dims; + attrs["elided_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &elided_window_dims}; + optional> gather_dims_to_operand_dims; + attrs["gather_dims_to_operand_dims"] = {/*required=*/true, + AttrTy::kBracedInt64List, + &gather_dims_to_operand_dims}; + optional index_vector_dim; + attrs["index_vector_dim"] = {/*required=*/true, AttrTy::kInt64, + &index_vector_dim}; + optional> window_bounds; + attrs["window_bounds"] = {/*required=*/true, AttrTy::kBracedInt64List, + &window_bounds}; + + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + + GatherDimensionNumbers dim_numbers = HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/*output_window_dims, + /*elided_window_dims=*/*elided_window_dims, + /*gather_dims_to_operand_dims=*/*gather_dims_to_operand_dims, + /*index_vector_dim=*/*index_vector_dim); + + instruction = builder->AddInstruction(HloInstruction::CreateGather( + shape, /*operand=*/operands[0], /*gather_indices=*/operands[1], + dim_numbers, *window_bounds)); + break; + } case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index b8c6b59204..863081d654 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -716,6 +716,18 @@ ENTRY %sparse_f32_r1 () -> f32[9] { ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6}) } +)" +}, +{ +"gather", +R"(HloModule StringifyGather + +ENTRY %Gather (input_tensor: f32[50,49,48,47,46], gather_indices: s64[10,9,8,7,5]) -> f32[10,9,8,7,30,29,28,27,26] { + %input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + %gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT %gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); @@ -860,6 +872,18 @@ ENTRY dot { ROOT dot = f32[2,3]{1,0} dot(a, b), lhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={0} } +)" +}, +{ +"gather", +R"(HloModule gather + +ENTRY Gather { + input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(input_tensor, gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); -- GitLab From 4fac98fbc731f742e0121fde561fcf6ed1203423 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 26 Feb 2018 11:13:09 -0800 Subject: [PATCH 086/311] Added const to Node* in various parts of the code base. PiperOrigin-RevId: 187050526 --- tensorflow/compiler/tf2xla/const_analysis.cc | 4 ++-- tensorflow/compiler/tf2xla/graph_compiler.cc | 2 +- .../core/common_runtime/shape_refiner.cc | 4 ++-- .../core/distributed_runtime/scheduler.cc | 18 +++++++++--------- .../core/distributed_runtime/scheduler.h | 6 +++--- tensorflow/core/graph/costmodel.cc | 2 +- tensorflow/core/graph/graph.cc | 2 +- tensorflow/core/graph/graph.h | 2 +- tensorflow/core/graph/graph_constructor.cc | 2 +- tensorflow/core/graph/graph_partition.cc | 6 +++--- tensorflow/core/graph/node_builder.cc | 6 +++--- tensorflow/core/graph/node_builder.h | 6 +++--- tensorflow/core/graph/optimizer_cse.cc | 16 ++++++++-------- 13 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index 82923722c5..6f46532419 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -37,7 +37,7 @@ Status BackwardsConstAnalysis(const Graph& g, }; Status status; - std::unordered_set must_be_const; + std::unordered_set must_be_const; auto visit = [&status, &metadata_ops, &must_be_const, compile_time_const_args](Node* node) { if (!status.ok()) return; @@ -55,7 +55,7 @@ Status BackwardsConstAnalysis(const Graph& g, compile_time_const_args->at(index) = true; return; } - for (Node* pred : node->in_nodes()) { + for (const Node* pred : node->in_nodes()) { must_be_const.insert(pred); } return; diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index 058a1f2621..b20c1ffc7d 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -130,7 +130,7 @@ Status GraphCompiler::Compile() { // Set up inputs from outputs of previous nodes. for (auto* e : n->in_edges()) { if (e->IsControlEdge()) continue; - Node* src = e->src(); + const Node* src = e->src(); TF_RET_CHECK(src->id() < output_registry.size()); const NodeOutputs& src_outputs = output_registry[src->id()]; diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 45cdab98e0..2acaa31d32 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -211,14 +211,14 @@ Status ShapeRefiner::AddNode(const Node* node) { // For each 'input' of this node, fetch the corresponding shape // from 'input's InferenceContext, and store into a vector // indexed by 'node's input. - std::vector input_nodes(node->num_inputs()); + std::vector input_nodes(node->num_inputs()); std::vector input_shapes(node->num_inputs()); std::vector>> input_handle_shapes_and_types(node->num_inputs()); for (const Edge* e : node->in_edges()) { if (e->IsControlEdge()) continue; - Node* input = e->src(); + const Node* input = e->src(); auto it = node_to_context_.find(input); if (it == node_to_context_.end()) { return errors::FailedPrecondition( diff --git a/tensorflow/core/distributed_runtime/scheduler.cc b/tensorflow/core/distributed_runtime/scheduler.cc index 9dae5b3b92..8403636197 100644 --- a/tensorflow/core/distributed_runtime/scheduler.cc +++ b/tensorflow/core/distributed_runtime/scheduler.cc @@ -80,7 +80,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { std::vector pending_count(graph_->num_node_ids()); InitializePending(graph_, &pending_count); - std::deque queue; + std::deque queue; Node* srcNode = graph_->source_node(); queue.push_back(srcNode); (*asap_times)[srcNode->id()] = 0; @@ -92,7 +92,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { for (const Edge* out_edge : curr->out_edges()) { // The time needed for 'out' to get its input from 'curr'. Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && curr->assigned_device_name() != out->assigned_device_name()) { // Add an arbitrary 10microsecs for each copy. @@ -137,7 +137,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { } } - std::deque queue; + std::deque queue; Node* sinkNode = graph_->sink_node(); queue.push_back(sinkNode); (*alap_times)[sinkNode->id()] = 0; @@ -148,7 +148,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { for (const Edge* in_edge : curr->in_edges()) { // The time needed for 'curr' to get its input from 'src'. Microseconds copy_time(0); - Node* src = in_edge->src(); + const Node* src = in_edge->src(); if (!in_edge->IsControlEdge() && src->assigned_device_name() != curr->assigned_device_name()) { // TODO(yuanbyu): Use the real cost model @@ -236,7 +236,7 @@ Microseconds GreedyScheduler::ComputeSchedule( for (const Edge* out_edge : event.node->out_edges()) { Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && event.node->assigned_device_name() != out->assigned_device_name()) { // TODO(yuanbyu): Use below with the real cost model. @@ -277,11 +277,11 @@ Microseconds GreedyScheduler::ComputeSchedule( return max_completion; } -Node* GreedyScheduler::GetNodeWithHighestPriority( - const std::vector& nodes) { - Node* curr_node = nullptr; +const Node* GreedyScheduler::GetNodeWithHighestPriority( + const std::vector& nodes) { + const Node* curr_node = nullptr; int64 curr_priority = kint64max; - for (Node* n : nodes) { + for (const Node* n : nodes) { if ((*priority_)[n->id()] < curr_priority) { curr_node = n; curr_priority = (*priority_)[n->id()]; diff --git a/tensorflow/core/distributed_runtime/scheduler.h b/tensorflow/core/distributed_runtime/scheduler.h index ef87b9834d..bf9d0d1bec 100644 --- a/tensorflow/core/distributed_runtime/scheduler.h +++ b/tensorflow/core/distributed_runtime/scheduler.h @@ -57,11 +57,11 @@ class GreedyScheduler { struct Sim { int degree_parallelism; int num_running; - std::vector ready_nodes; + std::vector ready_nodes; }; struct Event { - Node* node; + const Node* node; Microseconds time; bool is_completion; @@ -79,7 +79,7 @@ class GreedyScheduler { private: // Returns the ready node with the highest priority for a sim. - Node* GetNodeWithHighestPriority(const std::vector& nodes); + const Node* GetNodeWithHighestPriority(const std::vector& nodes); const DeviceSet* devices_; const CostModel* cost_model_; diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc index 4f3a6ec38c..1df45d9b89 100644 --- a/tensorflow/core/graph/costmodel.cc +++ b/tensorflow/core/graph/costmodel.cc @@ -427,7 +427,7 @@ static void AssignSizes(const Graph& g, CostModel* cost_model) { if (e->IsControlEdge()) { continue; } - Node* src = e->src(); + const Node* src = e->src(); // TODO(josh11b): Get an estimate from the Op Bytes size(1); diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 9b56216f1f..a7af5e2312 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -339,7 +339,7 @@ Node* Graph::AddNode(const NodeDef& node_def, Status* status) { return node; } -Node* Graph::CopyNode(Node* node) { +Node* Graph::CopyNode(const Node* node) { DCHECK(!node->IsSource()); DCHECK(!node->IsSink()); Node* copy = AllocateNode(node->props_, node); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 9d96cd4654..cbd58b051a 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -422,7 +422,7 @@ class Graph { // Copies *node, which may belong to another graph, to a new node, // which is returned. Does not copy any edges. *this owns the // returned instance. - Node* CopyNode(Node* node); + Node* CopyNode(const Node* node); // Removes a node from this graph, including all edges from or to it. // *node should not be accessed after calling this function. diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 0629ff32d0..627309078a 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -1271,7 +1271,7 @@ void CopyGraph(const Graph& src, Graph* dest) { dest->set_versions(src.versions()); // Copy the nodes - std::unordered_map + std::unordered_map node_map; // "Node in src" -> "Node in *dest" node_map[src.source_node()] = dest->source_node(); node_map[src.sink_node()] = dest->sink_node(); diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index add80eda23..17a174101b 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -123,8 +123,8 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { return false; } - Node* src = edge->src(); - Node* dst = edge->dst(); + const Node* src = edge->src(); + const Node* dst = edge->dst(); if (src->assigned_device_name() == dst->assigned_device_name()) { int src_port = edge->src_output(); int dst_port = edge->dst_input(); @@ -141,7 +141,7 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { // Return true iff (dst, dst_input) is specified on host memory. bool IsDstInputOnHost(const Edge* edge, const GraphInfo& info) { - Node* dst = edge->dst(); + const Node* dst = edge->dst(); int dst_port = edge->dst_input(); if (info.device_types[dst->id()] != DEVICE_CPU) { if (edge->IsControlEdge()) return false; diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc index 138952dcb3..114962c0e4 100644 --- a/tensorflow/core/graph/node_builder.cc +++ b/tensorflow/core/graph/node_builder.cc @@ -88,7 +88,7 @@ NodeBuilder& NodeBuilder::ControlInput(Node* src_node) { NodeBuilder& NodeBuilder::ControlInputs(gtl::ArraySlice src_nodes) { control_inputs_.insert(control_inputs_.end(), src_nodes.begin(), src_nodes.end()); - for (Node* src_node : src_nodes) { + for (const Node* src_node : src_nodes) { def_builder_.ControlInput(src_node->name()); } return *this; @@ -127,7 +127,7 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const { return Status::OK(); } -void NodeBuilder::AddIndexError(Node* node, int i) { +void NodeBuilder::AddIndexError(const Node* node, int i) { if (node == nullptr) { errors_.emplace_back( strings::StrCat("Attempt to add nullptr Node to node with type ", @@ -140,7 +140,7 @@ void NodeBuilder::AddIndexError(Node* node, int i) { } } -bool NodeBuilder::GetOutputType(Node* node, int i, DataType* dt) { +bool NodeBuilder::GetOutputType(const Node* node, int i, DataType* dt) { bool error; *dt = SafeGetOutput(node, i, &error); if (error) AddIndexError(node, i); diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h index 86647a49c1..f6b7b5674b 100644 --- a/tensorflow/core/graph/node_builder.h +++ b/tensorflow/core/graph/node_builder.h @@ -120,7 +120,7 @@ class NodeBuilder { const OpDef& op_def() const { return def_builder_.op_def(); } private: - static DataType SafeGetOutput(Node* node, int i, bool* error) { + static DataType SafeGetOutput(const Node* node, int i, bool* error) { if (node != nullptr && i >= 0 && i < node->num_outputs()) { *error = false; return node->output_type(i); @@ -131,11 +131,11 @@ class NodeBuilder { } // If SafeGetOutput indicates a range error, add it to errors_. - void AddIndexError(Node* node, int i); + void AddIndexError(const Node* node, int i); // Set *dt and returns true if i is in range. Combines // SafeGetOutput() and AddIndexError(). - bool GetOutputType(Node* node, int i, DataType* dt); + bool GetOutputType(const Node* node, int i, DataType* dt); NodeDefBuilder def_builder_; std::vector inputs_; diff --git a/tensorflow/core/graph/optimizer_cse.cc b/tensorflow/core/graph/optimizer_cse.cc index 6b452a1d5d..4073255db3 100644 --- a/tensorflow/core/graph/optimizer_cse.cc +++ b/tensorflow/core/graph/optimizer_cse.cc @@ -65,8 +65,8 @@ class OptimizerCSE { }; static void FillInputs(const Node* n, - gtl::InlinedVector* control_edges, - gtl::InlinedVector, 4>* in) { + gtl::InlinedVector* control_edges, + gtl::InlinedVector, 4>* in) { DCHECK_EQ(in->size(), n->num_inputs()); control_edges->clear(); for (const Edge* e : n->in_edges()) { @@ -96,8 +96,8 @@ size_t OptimizerCSE::NodeHash(const Node* n) { const int N_in = n->num_inputs(); strings::StrAppend(&str_to_hash, N_in); - gtl::InlinedVector control_edges; - gtl::InlinedVector, 4> in(N_in); + gtl::InlinedVector control_edges; + gtl::InlinedVector, 4> in(N_in); FillInputs(n, &control_edges, &in); for (const auto& edge : in) { strings::StrAppend(&str_to_hash, edge.first->id(), edge.second); @@ -147,10 +147,10 @@ bool OptimizerCSE::Equivalent(const Node* a, const Node* b, // Compare input sources if (a->num_inputs() != b->num_inputs()) return false; const int N_in = a->num_inputs(); - gtl::InlinedVector a_control_edges; - gtl::InlinedVector b_control_edges; - gtl::InlinedVector, 4> a_in(N_in); - gtl::InlinedVector, 4> b_in(N_in); + gtl::InlinedVector a_control_edges; + gtl::InlinedVector b_control_edges; + gtl::InlinedVector, 4> a_in(N_in); + gtl::InlinedVector, 4> b_in(N_in); FillInputs(a, &a_control_edges, &a_in); FillInputs(b, &b_control_edges, &b_in); if (a_in != b_in) return false; -- GitLab From 2d5db0213258da2e97276af7e6e9d85e9a1e2100 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 26 Feb 2018 11:22:43 -0800 Subject: [PATCH 087/311] TFLite: Ensures pointers to tensors won't be invalidated unless 16+ tensors are added. PiperOrigin-RevId: 187052100 --- tensorflow/contrib/lite/interpreter.cc | 13 +++---- tensorflow/contrib/lite/interpreter.h | 20 +++++++++++ tensorflow/contrib/lite/interpreter_test.cc | 40 +++++++++++++++++++++ 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 370e495527..0f5e17f0de 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -27,13 +27,6 @@ limitations under the License. #include "tensorflow/contrib/lite/nnapi_delegate.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" -namespace { - -// std::vector preallocation tuning. -constexpr const int kSlotsToReserve = 128; - -} // namespace - namespace tflite { // A trivial implementation of GraphInfo around the Interpreter. @@ -85,8 +78,8 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.GetExecutionPlan = nullptr; // Reserve some space for the tensors to avoid excessive resizing. - tensors_.reserve(kSlotsToReserve); - nodes_and_registration_.reserve(kSlotsToReserve); + tensors_.reserve(kTensorsReservedCapacity); + nodes_and_registration_.reserve(kTensorsReservedCapacity); next_execution_plan_index_to_prepare_ = 0; UseNNAPI(false); } @@ -353,6 +346,7 @@ TfLiteStatus Interpreter::PrepareOpsStartingAt( TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpPrepare(registration, &node) == kTfLiteError) { return kTfLiteError; } @@ -430,6 +424,7 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpInvoke(registration, &node) == kTfLiteError) { status = kTfLiteError; } diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index a9df2627e0..04c19644a0 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -265,6 +265,14 @@ class Interpreter { void set_model(const Model* model) { model_ = const_cast(model); } Model* model() const { return model_; } + // The default capacity of `tensors_` vector. + static constexpr int kTensorsReservedCapacity = 128; + // The capacity headroom of `tensors_` vector before calling ops' + // `prepare` and `invoke` function. In these functions, it's guaranteed + // allocating up to `kTensorsCapacityHeadroom` more tensors won't invalidate + // pointers to existing tensors. + static constexpr int kTensorsCapacityHeadroom = 16; + private: // Give 'op_reg' a chance to initialize itself using the contents of // 'buffer'. @@ -377,6 +385,18 @@ class Interpreter { static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context, TfLiteIntArray** execution_plan); + // Ensures that `tensors_` has at least `kTensorsCapacityHeadroom` extra + // capacity. Calling this function may invalidate existing pointers to + // tensors. After calling this function, adding `kTensorsCapacityHeadroom` + // more tensors won't invalidate the pointer to existing tensors. + void EnsureTensorsVectorCapacity() { + const int required_capacity = tensors_size() + kTensorsCapacityHeadroom; + if (required_capacity > tensors_.capacity()) { + tensors_.reserve(required_capacity); + context_.tensors = tensors_.data(); + } + } + // A pure C data structure used to communicate with the pure C plugin // interface. To avoid copying tensor metadata, this is also the definitive // structure to store tensors. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 28c96e5dde..2e6727b323 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -561,6 +561,46 @@ TEST(BasicInterpreter, TestCustomErrorReporter) { ASSERT_EQ(reporter.calls, 1); } +TEST(InterpreterTensorsCapacityTest, TestWithinHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom, + &new_tensor_index); + EXPECT_EQ(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + +TEST(InterpreterTensorsCapacityTest, TestExceedHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom + 1, + &new_tensor_index); + EXPECT_NE(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + // Test fixture that allows playing with execution plans. It creates a two // node graph that can be executed in either [0,1] order or [1,0] order. // The CopyOp records when it is invoked in the class member run_order_ -- GitLab From 215af206b0cba3ac3d64fe01ec372c924662f97f Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 11:43:14 -0800 Subject: [PATCH 088/311] Actually expose smart_cond and smart_constant_value in tf.contrib.framework Also moves these methods into their own file in python/framework. This avoids further bloating control_flow_ops.py and makes the BUILD deps easier for a future change I'm working on. PiperOrigin-RevId: 187055501 --- tensorflow/contrib/framework/BUILD | 1 + tensorflow/contrib/framework/__init__.py | 7 +- tensorflow/python/BUILD | 26 ++++++ tensorflow/python/framework/smart_cond.py | 79 +++++++++++++++++++ .../python/framework/smart_cond_test.py | 66 ++++++++++++++++ tensorflow/python/layers/utils.py | 5 +- tensorflow/python/ops/control_flow_ops.py | 56 ------------- .../python/ops/control_flow_ops_test.py | 36 --------- 8 files changed, 180 insertions(+), 96 deletions(-) create mode 100644 tensorflow/python/framework/smart_cond.py create mode 100644 tensorflow/python/framework/smart_cond_test.py diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index 1accb319d2..50868c6d6c 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -63,6 +63,7 @@ tf_custom_op_py_library( "//tensorflow/python:platform", "//tensorflow/python:pywrap_tensorflow", "//tensorflow/python:script_ops", + "//tensorflow/python:smart_cond", "//tensorflow/python:sparse_tensor", "//tensorflow/python:state_ops", "//tensorflow/python:state_ops_gen", diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index deeb5bec79..8063250091 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -87,6 +87,9 @@ See the @{$python/contrib.framework} guide. @@get_placeholders +@@smart_cond +@@smart_constant_value + @@CriticalSection @@BoundedTensorSpec @@ -104,10 +107,10 @@ from tensorflow.contrib.framework.python.ops import * from tensorflow.python.framework.ops import prepend_name_scope from tensorflow.python.framework.ops import strip_name_scope +from tensorflow.python.framework.smart_cond import smart_cond +from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.control_flow_ops import smart_cond -from tensorflow.python.ops.control_flow_ops import smart_constant_value from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['nest'] diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4c8c73548c..b0cb48c80c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -765,6 +765,31 @@ py_library( ], ) +py_library( + name = "smart_cond", + srcs = ["framework/smart_cond.py"], + srcs_version = "PY2AND3", + deps = [ + ":control_flow_ops", + ":tensor_util", + ], +) + +py_test( + name = "smart_cond_test", + size = "small", + srcs = ["framework/smart_cond_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":client_testlib", + ":constant_op", + ":framework_ops", + ":math_ops", + ":session", + ":smart_cond", + ], +) + py_library( name = "sparse_tensor", srcs = ["framework/sparse_tensor.py"], @@ -4091,6 +4116,7 @@ py_library( ":control_flow_ops", ":framework_for_generated_wrappers", ":platform", + ":smart_cond", ":tensor_util", ":util", ":variable_scope", diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py new file mode 100644 index 0000000000..f97bb01f54 --- /dev/null +++ b/tensorflow/python/framework/smart_cond.py @@ -0,0 +1,79 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""smart_cond and related utilties.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import control_flow_ops + + +def smart_cond(pred, true_fn=None, false_fn=None, name=None): + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Arguments: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if not callable(true_fn): + raise TypeError("`true_fn` must be callable.") + if not callable(false_fn): + raise TypeError("`false_fn` must be callable.") + + pred_value = smart_constant_value(pred) + if pred_value is not None: + if pred_value: + return true_fn() + else: + return false_fn() + else: + return control_flow_ops.cond(pred, true_fn=true_fn, false_fn=false_fn, + name=name) + + +def smart_constant_value(pred): + """Return the bool value for `pred`, or None if `pred` had a dynamic value. + + Arguments: + pred: A scalar, either a Python bool or tensor. + + Returns: + True or False if `pred` has a constant boolean value, None otherwise. + + Raises: + TypeError: If `pred` is not a Tensor or bool. + """ + if isinstance(pred, bool): + pred_value = pred + elif isinstance(pred, ops.Tensor): + pred_value = tensor_util.constant_value(pred) + else: + raise TypeError("`pred` must be a Tensor or a Python bool.") + return pred_value diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py new file mode 100644 index 0000000000..b682506da0 --- /dev/null +++ b/tensorflow/python/framework/smart_cond_test.py @@ -0,0 +1,66 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond +from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import googletest + + +@test_util.with_c_api +class SmartCondTest(test_util.TensorFlowTestCase): + + def testSmartCondTrue(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(2) + y = constant_op.constant(5) + z = smart_cond.smart_cond(True, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 5)) + self.assertEqual(z.eval(), 32) + + def testSmartCondFalse(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(4) + y = constant_op.constant(3) + z = smart_cond.smart_cond(False, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 3)) + self.assertEqual(z.eval(), 9) + + def testSmartCondMissingArg1(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, false_fn=lambda: x) + + def testSmartCondMissingArg2(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, lambda: x) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py index 484c6fc466..3b156c36a2 100644 --- a/tensorflow/python/layers/utils.py +++ b/tensorflow/python/layers/utils.py @@ -24,6 +24,7 @@ from tensorflow.python.eager import context from tensorflow.python.ops import variables from tensorflow.python.ops import control_flow_ops from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond as smart_module from tensorflow.python.framework import tensor_util from tensorflow.python.util import nest @@ -201,7 +202,7 @@ def smart_cond(pred, true_fn=None, false_fn=None, name=None): if isinstance(pred, variables.Variable): return control_flow_ops.cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) - return control_flow_ops.smart_cond( + return smart_module.smart_cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) @@ -228,7 +229,7 @@ def constant_value(pred): if isinstance(pred, variables.Variable): return None - return control_flow_ops.smart_constant_value(pred) + return smart_module.smart_constant_value(pred) def object_list_uid(object_list): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 8218e60b53..152578c0c6 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -23,7 +23,6 @@ See the @{$python/control_flow_ops} guide. @@no_op @@count_up_to @@cond -@@smart_cond @@case @@while_loop @@logical_and @@ -2128,61 +2127,6 @@ def cond(pred, # pylint: enable=redefined-outer-name -def smart_cond(pred, true_fn=None, false_fn=None, name=None): - """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. - - If `pred` is a bool or has a constant value, we return either `true_fn()` - or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. - - Arguments: - pred: A scalar determining whether to return the result of `true_fn` or - `false_fn`. - true_fn: The callable to be performed if pred is true. - false_fn: The callable to be performed if pred is false. - name: Optional name prefix when using `tf.cond`. - - Returns: - Tensors returned by the call to either `true_fn` or `false_fn`. - - Raises: - TypeError: If `true_fn` or `false_fn` is not callable. - """ - if not callable(true_fn): - raise TypeError("`true_fn` must be callable.") - if not callable(false_fn): - raise TypeError("`false_fn` must be callable.") - - pred_value = smart_constant_value(pred) - if pred_value is not None: - if pred_value: - return true_fn() - else: - return false_fn() - else: - return cond(pred, true_fn=true_fn, false_fn=false_fn, name=name) - - -def smart_constant_value(pred): - """Return the bool value for `pred`, or None if `pred` had a dynamic value. - - Arguments: - pred: A scalar, either a Python bool or tensor. - - Returns: - True or False if `pred` has a constant boolean value, None otherwise. - - Raises: - TypeError: If `pred` is not a Tensor or bool. - """ - if isinstance(pred, bool): - pred_value = pred - elif isinstance(pred, ops.Tensor): - pred_value = tensor_util.constant_value(pred) - else: - raise TypeError("`pred` must be a Tensor or a Python bool.") - return pred_value - - def _resource_safe_shape(t): """Returns the shape of t or the variable it points to.""" if t.dtype == dtypes.resource: diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py index adc8c51e11..f22f3059d1 100644 --- a/tensorflow/python/ops/control_flow_ops_test.py +++ b/tensorflow/python/ops/control_flow_ops_test.py @@ -349,42 +349,6 @@ class SwitchTestCase(test_util.TensorFlowTestCase): self.assertEquals(grad_x_false.eval(), 0.) -@test_util.with_c_api -class SmartCondTest(test_util.TensorFlowTestCase): - - def testSmartCondTrue(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(2) - y = constant_op.constant(5) - z = control_flow_ops.smart_cond(True, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 5)) - self.assertEqual(z.eval(), 32) - - def testSmartCondFalse(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(4) - y = constant_op.constant(3) - z = control_flow_ops.smart_cond(False, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 3)) - self.assertEqual(z.eval(), 9) - - def testSmartCondMissingArg1(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, false_fn=lambda: x) - - def testSmartCondMissingArg2(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, lambda: x) - - @test_util.with_c_api class CondTest(test_util.TensorFlowTestCase): -- GitLab From 8525e1dbdcab467e545f09ecf60f0be11b48cd28 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:50:49 -0800 Subject: [PATCH 089/311] Add the internal module name prefix to the white list. PiperOrigin-RevId: 187056701 --- tensorflow/contrib/py2tf/impl/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/impl/config.py b/tensorflow/contrib/py2tf/impl/config.py index c90e85c96b..bdbc6663dd 100644 --- a/tensorflow/contrib/py2tf/impl/config.py +++ b/tensorflow/contrib/py2tf/impl/config.py @@ -31,12 +31,16 @@ PYTHON_LITERALS = { DEFAULT_UNCOMPILED_MODULES = set(( ('tensorflow',), (utils.__name__,), + + # All of tensorflow's subpackages. Unlike the root tf module, they don't + # have well-known names. Not refering to the module directly to avoid + # circular imports. + (utils.__name__[:-len('.contrib.py2tf.utils')],), )) NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) # TODO(mdan): Also allow controlling the generated names (for testability). -# TODO(mdan): Make sure copybara renames the reference below. COMPILED_IMPORT_STATEMENTS = ( 'from __future__ import print_function', 'import tensorflow as tf', -- GitLab From 5caeb37e5d4314b702cf660db35b93a3bfc29819 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 26 Feb 2018 11:52:26 -0800 Subject: [PATCH 090/311] Internal change. PiperOrigin-RevId: 187056963 --- tensorflow/tools/api/tests/api_compatibility_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index c1e09cc531..2a784973e1 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -165,7 +165,7 @@ class ApiCompatibilityTest(test.TestCase): logging.error('%d differences found between API and golden.', diff_count) messages = verbose_diffs if verbose else diffs for i in range(diff_count): - logging.error('Issue %d\t: %s', i + 1, messages[i]) + print('Issue %d\t: %s' % (i + 1, messages[i]), file=sys.stderr) if update_goldens: # Write files if requested. -- GitLab From 0898ee302cb20d9fce50dae4f484816a2dc2d0e2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 11:57:30 -0800 Subject: [PATCH 091/311] Use optimized ops to handle GPU memory swapping: this avoids the need for 2 pairs of extra _send/_recv nodes which speeds things up a bit. This also ensures that performance doesn't depend on the recv scheduling built in TF, which isn't always optimal. PiperOrigin-RevId: 187057831 --- tensorflow/core/grappler/optimizers/BUILD | 36 +++++++- .../optimizers/gpu_swapping_kernels.cc | 88 +++++++++++++++++++ .../grappler/optimizers/gpu_swapping_ops.cc | 58 ++++++++++++ .../grappler/optimizers/memory_optimizer.cc | 9 +- .../optimizers/memory_optimizer_test.cc | 65 +++++++++++--- tensorflow/core/grappler/utils/BUILD | 1 + .../core/grappler/utils/grappler_test.cc | 17 ++++ .../core/grappler/utils/grappler_test.h | 3 + 8 files changed, 258 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 50ba48ea7a..908e58bcc7 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -1,6 +1,8 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") filegroup( name = "all_files", @@ -282,18 +284,48 @@ tf_cc_test( ], ) +tf_kernel_library( + name = "gpu_swapping_kernels", + srcs = [ + "gpu_swapping_kernels.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "gpu_swapping_ops", + srcs = [ + "gpu_swapping_ops.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + cc_library( name = "memory_optimizer", - srcs = ["memory_optimizer.cc"], + srcs = [ + "memory_optimizer.cc", + ], hdrs = [ "memory_optimizer.h", ], visibility = ["//visibility:public"], deps = [ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -307,7 +339,7 @@ cc_library( ], ) -tf_cc_test( +tf_cc_test_gpu( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], deps = [ diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc new file mode 100644 index 0000000000..1820af6844 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc @@ -0,0 +1,88 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Op kernels used to swap data in and out of GPU memory. + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +class CopyFromGpuToHostKernel : public AsyncOpKernel { + public: + explicit CopyFromGpuToHostKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, !ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromGpuToHost kernel " + "must reside on the device."), + done); + + AllocatorAttributes alloc_attrs; + alloc_attrs.set_gpu_compatible(true); + alloc_attrs.set_on_host(true); + Tensor* output; + OP_REQUIRES_OK_ASYNC( + ctx, ctx->allocate_output(0, input.shape(), &output, alloc_attrs), + done); + + ctx->op_device_context()->CopyDeviceTensorToCPU( + &input, "CopyFromGpuToHost", static_cast(ctx->device()), + output, [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromGpuToHost").Device(DEVICE_GPU).HostMemory("output"), + CopyFromGpuToHostKernel); + +class CopyFromHostToGpuKernel : public AsyncOpKernel { + public: + explicit CopyFromHostToGpuKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromHostToGpu kernel " + "must reside on the host."), + done); + + Tensor* output; + OP_REQUIRES_OK_ASYNC(ctx, ctx->allocate_output(0, input.shape(), &output), + done); + + ctx->op_device_context()->CopyCPUTensorToDevice( + &input, static_cast(ctx->device()), output, + [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromHostToGpu").Device(DEVICE_GPU).HostMemory("input"), + CopyFromHostToGpuKernel); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc new file mode 100644 index 0000000000..46828346da --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Definition for the ops used to swap data in and out of GPU memory. + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +// The _CopyFromGpuToHost op copies its input tensor to the host. The input must +// reside on GPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromGpuToHost") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from gpu to the host."); + +// The _CopyFromHostToGpu op copies its input tensor from the host to the GPU. +// The input must reside on CPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromHostToGpu") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from the host to the GPU."); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index dec4f04a1c..694139fa50 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -720,18 +720,19 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap, // Force the tensor to be copied to cpu. NodeDef* swap_out_node = graph->add_node(); swap_out_node->set_name(swap_out_name); - swap_out_node->set_op("Identity"); - swap_out_node->set_device("/device:CPU:0"); + swap_out_node->set_op("_CopyFromGpuToHost"); // Force the tensor to be restored to the device. NodeDef* swap_in_node = graph->add_node(); swap_in_node->set_name(swap_in_name); - swap_in_node->set_op("Identity"); + swap_in_node->set_op("_CopyFromHostToGpu"); *swap_in_node->add_input() = swap_out_node->name(); - // Colocate the swap_in_ node with the node itself. + // Colocate the swap_out_ and swap_in_ nodes with the node itself. + swap_out_node->set_device(node->device()); swap_in_node->set_device(node->device()); string coloc_group = strings::StrCat("loc@", tensor_to_swap); + (*swap_out_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index 5d7913e0c0..9595936e9e 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -221,16 +221,20 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { // Build a simple graph with an op that's marked for swapping. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = ops::Variable(s.WithOpName("a"), {10, 10}, DT_FLOAT); - Output b = ops::AddN(s.WithOpName("b"), {a}); - Output c = ops::AddN(s.WithOpName("c"), {b}); - Output d = ops::AddN(s.WithOpName("d"), {c}); - Output e = ops::AddN(s.WithOpName("e"), {b, d}); + Output a = + ops::Variable(s.WithOpName("a").WithDevice("/gpu:0"), {10, 10}, DT_FLOAT); + Output b = ops::AddN(s.WithOpName("b").WithDevice("/gpu:0"), {a}); + Output c = ops::AddN(s.WithOpName("c").WithDevice("/gpu:0"), {b}); + Output d = ops::AddN(s.WithOpName("d").WithDevice("/gpu:0"), {c}); + Output e = ops::AddN(s.WithOpName("e").WithDevice("/gpu:0"), {b, d}); + + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {10, 10}); + Output init = ops::Assign(s.WithOpName("init"), a, constant); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - EXPECT_EQ(5, item.graph.node_size()); + EXPECT_EQ(7, item.graph.node_size()); EXPECT_EQ(NodeName(e.name()), item.graph.node(4).name()); AttrValue& val = (*item.graph.mutable_node(4)->mutable_attr())["_swap_to_host"]; @@ -243,32 +247,43 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { Status status = optimizer.Optimize(cluster.get(), item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(7, output.node_size()); - const NodeDef& new_e = output.node(4); + EXPECT_EQ(9, output.node_size()); + const NodeDef& new_e = output.node(6); EXPECT_EQ(NodeName(e.name()), new_e.name()); EXPECT_EQ(2, new_e.input_size()); EXPECT_EQ(NodeName(d.name()), new_e.input(1)); EXPECT_EQ("swap_in_e_0", new_e.input(0)); - const NodeDef& swap_out = output.node(5); + const NodeDef& swap_out = output.node(7); EXPECT_EQ("swap_out_e_0", swap_out.name()); + EXPECT_EQ("_CopyFromGpuToHost", swap_out.op()); - const NodeDef& swap_in = output.node(6); + const NodeDef& swap_in = output.node(8); EXPECT_EQ("swap_in_e_0", swap_in.name()); + EXPECT_EQ("_CopyFromHostToGpu", swap_in.op()); EXPECT_EQ(NodeName(b.name()), swap_out.input(0)); EXPECT_EQ(NodeName(swap_out.name()), swap_in.input(0)); EXPECT_EQ("^c", swap_in.input(1)); - const NodeDef& new_c = output.node(2); + const NodeDef& new_c = output.node(4); EXPECT_EQ(NodeName(c.name()), new_c.name()); EXPECT_EQ("^swap_out_e_0", new_c.input(1)); // Run the optimizer a second time to ensure it's idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(cluster.get(), item, &output); + GrapplerItem item_copy(item, std::move(output)); + status = optimizer.Optimize(cluster.get(), item_copy, &output); TF_EXPECT_OK(status); + +#if GOOGLE_CUDA + item.fetch = {"e"}; + item.init_ops = {init.name()}; + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, SwappingHeuristics) { @@ -287,9 +302,13 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { Output h = ops::Exp(s.WithOpName("h").WithDevice("/gpu:0"), c); Output i = ops::Log(s.WithOpName("i").WithDevice("/gpu:0"), d); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e", "f", "g", "h", "i"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -308,6 +327,15 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { EXPECT_EQ("axis", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + for (int i = 0; i < item.fetch.size(); ++i) { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } +#endif } TEST_F(MemoryOptimizerTest, UnswappableInputs) { @@ -325,9 +353,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { Output e = ops::Concat(s.WithOpName("e").WithDevice("/gpu:0"), {b, c, d}, axis); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -344,6 +376,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { EXPECT_EQ("^swap_out_d_2", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, AccumulationRewrites) { diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 0a9dbe22cf..5d32609434 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -142,6 +142,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", + "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", ], ) diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index fed46c05fb..fef8e97b6e 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -35,6 +35,23 @@ std::vector GrapplerTest::EvaluateNodes( return output_tensors; } +std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(item.graph)); + RunOptions run_options; + if (!item.init_ops.empty()) { + std::vector dummy; + TF_CHECK_OK( + session->Run(run_options, {}, {}, item.init_ops, &dummy, nullptr)); + } + std::vector output_tensors; + TF_CHECK_OK( + session->Run(run_options, {}, item.fetch, {}, &output_tensors, nullptr)); + TF_CHECK_OK(session->Close()); + return output_tensors; +} + void GrapplerTest::AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph) { auto* node = graph->add_node(); diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 042b616aa4..fd6809b6e2 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -30,6 +31,8 @@ class GrapplerTest : public ::testing::Test { std::vector EvaluateNodes(const GraphDef& graph, const std::vector& node_names); + std::vector EvaluateFetchNodes(const GrapplerItem& item); + void AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph); -- GitLab From 33a447a3df13559d746b86e2446ee9174099cd3b Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 12:10:01 -0800 Subject: [PATCH 092/311] Fix bug calling gradients_function inside custom_gradient PiperOrigin-RevId: 187059871 --- tensorflow/python/eager/backprop_test.py | 13 +++++++++++++ tensorflow/python/eager/custom_gradient.py | 9 ++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 734558dee2..48fd170764 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -115,6 +115,19 @@ class BackpropTest(test.TestCase): with self.assertRaises(RuntimeError): backprop.gradients_function(f)(constant_op.constant(1.0)) + def testGradientsFunctionInCustomGradient(self): + + @custom_gradient.custom_gradient + def f(x): + (y,) = backprop.gradients_function(lambda x: x * x)(x) + + def grad(dy): + return [2 * dy] + + return y, grad + + self.assertAllEqual(f(1.0), 2.0) + def testImplicitGradOverEmbeddingLookup(self): batch_size = 8 embedding_size = 512 diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py index 05460ff996..fb932a9372 100644 --- a/tensorflow/python/eager/custom_gradient.py +++ b/tensorflow/python/eager/custom_gradient.py @@ -71,11 +71,10 @@ def custom_gradient(f): input_tensors = [tf_ops.convert_to_tensor(x) for x in args] - with tape.stop_recording(): - result, grad_fn = f(*args, **kwargs) - flat_result = nest.flatten(result) - # TODO(apassos) consider removing the identity below. - flat_result = [gen_array_ops.identity(x) for x in flat_result] + result, grad_fn = f(*args, **kwargs) + flat_result = nest.flatten(result) + # TODO(apassos) consider removing the identity below. + flat_result = [gen_array_ops.identity(x) for x in flat_result] def actual_grad_fn(*outputs): return nest.flatten(grad_fn(*outputs)) -- GitLab From cfb6e1628cf752f6cb1d844b8bba3a2cfc98b1e3 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Mon, 26 Feb 2018 12:23:36 -0800 Subject: [PATCH 093/311] Internal change. PiperOrigin-RevId: 187061863 --- tensorflow/contrib/bayesflow/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 08b29fb6bc..270c309ec3 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -210,7 +210,7 @@ cuda_py_test( cuda_py_test( name = "hmc_test", - size = "medium", + size = "large", srcs = ["python/kernel_tests/hmc_test.py"], additional_deps = [ ":bayesflow_py", -- GitLab From 509e51bc809032bd3d9443bd4afc152fb5eaaf93 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 12:33:17 -0800 Subject: [PATCH 094/311] Maintain a cache of output dtypes of ops in TFE_Context. PiperOrigin-RevId: 187062992 --- tensorflow/c/eager/c_api.cc | 20 ++++++++++++++++++++ tensorflow/c/eager/runtime.cc | 15 ++++++++++++--- tensorflow/c/eager/runtime.h | 6 ++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index c27a7129fa..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/rendezvous.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" @@ -823,6 +824,25 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, delete kernel; return; } + // Update output_dtypes inside `kernel`. + const tensorflow::OpDef* op_def = nullptr; + const tensorflow::FunctionDef* function_def = + ctx->func_lib_def.Find(ndef.op()); + if (function_def != nullptr) { + op_def = &(function_def->signature()); + } + if (op_def == nullptr) { + status->status = OpDefForOp(ndef.op().c_str(), &op_def); + if (!status->status.ok()) { + return; + } + } + tensorflow::DataTypeVector input_dtypes; + status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes, + kernel->output_dtypes()); + if (!status->status.ok()) { + return; + } tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index f77a937f1f..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -41,17 +41,26 @@ const uint32 kIsList = 1U << 31; } // namespace +Status OpDefForOp(const char* op_name, const OpDef** op_def) { + const OpRegistrationData* op_reg_data = nullptr; + Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + if (s.ok()) { + *op_def = &op_reg_data->op_def; + } + return s; +} + Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) { mutex_lock l(g_op_name_to_attr_type_map_lock); *out = gtl::FindPtrOrNull(*OpNameToAttrTypeMap(), op_name); if (*out != nullptr) return Status::OK(); - const OpRegistrationData* op_reg_data = nullptr; - Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + const OpDef* op_def = nullptr; + Status s = OpDefForOp(op_name, &op_def); if (!s.ok()) return s; std::unique_ptr m(new AttrTypeMap); // TODO(agarwal): Avoid having to create this "registry" at runtime, // perhaps can be done at op registration time? - for (const auto& attr : op_reg_data->op_def.attr()) { + for (const auto& attr : op_def->attr()) { string type = attr.type(); const bool is_list = (type.length() > 6 && type.compare(0, 4, "list") == 0); if (is_list) { diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 4d20b5244a..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -39,6 +39,9 @@ namespace tensorflow { // represent the TF_AttrType type of the values in the list. typedef std::unordered_map AttrTypeMap; +// Look up OpDef for `op_name`. +Status OpDefForOp(const char* op_name, const OpDef** op_def); + // Returns the AttrTypeMap for the TensorFlow operation named op_name. Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out); @@ -180,12 +183,15 @@ class KernelAndDevice { const OpKernel* kernel() const { return kernel_.get(); } + DataTypeVector* output_dtypes() { return &output_dtypes_; } + private: std::unique_ptr kernel_; Device* device_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; + DataTypeVector output_dtypes_; }; } // namespace tensorflow -- GitLab From 19c601b53a8444a26fc6694a2766897df37fc336 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Mon, 26 Feb 2018 13:06:59 -0800 Subject: [PATCH 095/311] Include c_api_experimental in libtensorflow.so's dependencies. PiperOrigin-RevId: 187068103 --- tensorflow/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index dc995d231d..3828ee0ddb 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -787,6 +787,7 @@ tf_cc_shared_object( }), deps = [ "//tensorflow/c:c_api", + "//tensorflow/c:c_api_experimental", "//tensorflow/c:exported_symbols.lds", "//tensorflow/c:version_script.lds", "//tensorflow/c/eager:c_api", -- GitLab From 6c99456856973d7cfee31aeeabef8d79014a097f Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 26 Feb 2018 13:54:02 -0800 Subject: [PATCH 096/311] Update eager uniform replay buffer microbenchmarks to compare against graph functions when possible. PiperOrigin-RevId: 187075418 --- .../contrib/framework/python/ops/critical_section_ops.py | 6 ++++-- tensorflow/python/framework/ops.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index 3c5c55ed65..ab603cc18e 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -143,7 +143,7 @@ class CriticalSection(object): def _init_from_args(self, name, shared_name): # pylint: disable=invalid-name """Initialize the CriticalSection from constructor arguments.""" with ops.name_scope(name, "CriticalSection", []) as name: - with ops.control_dependencies(None): + with ops.init_scope(): # pylint: disable=protected-access container = ops.get_default_graph()._container # pylint: enable=protected-access @@ -226,7 +226,9 @@ class CriticalSection(object): # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): - if sg.handle.name == self._handle.name: + sg_handle_name = ops.convert_to_tensor(sg.handle).name + self_handle_name = ops.convert_to_tensor(self._handle).name + if sg_handle_name == self_handle_name: # Other executions in the same critical section are allowed. continue if not (exclusive_resource_access or sg.exclusive_resource_access): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 5a14ea4176..b0d2704c07 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4805,7 +4805,14 @@ def container(container_name): @tf_export("colocate_with") def colocate_with(op, ignore_existing=False): if context.in_graph_mode(): - return get_default_graph().colocate_with(op, ignore_existing) + default_graph = get_default_graph() + if isinstance(op, EagerTensor): + if default_graph.building_function: + op = internal_convert_to_tensor(op) + else: + raise ValueError("Encountered an Eager-defined Tensor during graph " + "construction, but a function was not being built.") + return default_graph.colocate_with(op, ignore_existing) else: if op is not None: return device(op.device) -- GitLab From 01b96c59f410b44a6279627529a643b1e4da4aa5 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 26 Feb 2018 14:00:07 -0800 Subject: [PATCH 097/311] TFTS: Switch to using core feature columns This fixes some shape issues that came up when using the tf.contrib.layers parsing functions. Adds a string -> embedding column API example to the LSTM example. PiperOrigin-RevId: 187076400 --- .../examples/data/multivariate_periods.csv | 200 +++++++++--------- .../timeseries/examples/known_anomaly.py | 8 +- .../contrib/timeseries/examples/lstm.py | 26 ++- .../python/timeseries/estimators.py | 53 +++-- .../timeseries/python/timeseries/model.py | 38 ++-- .../state_space_models/state_space_model.py | 10 +- 6 files changed, 177 insertions(+), 158 deletions(-) diff --git a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv index b49a0662c2..9b15b4f0b2 100644 --- a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv +++ b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv @@ -1,100 +1,100 @@ -0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0. -1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0. -2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0. -3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0. -4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0. -5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0. -6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0. -7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0. -8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0. -9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0. -10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0. -11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0. -12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0. -13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0. -14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0. -15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0. -16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0. -17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0. -18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0. -19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0. -20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0. -21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0. -22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0. -23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0. -24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0. -25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0. -26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0. -27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0. -28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0. -29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0. -30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0. -31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0. -32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0. -33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0. -34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0. -35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0. -36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0. -37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0. -38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0. -39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0. -40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0. -41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0. -42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0. -43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0. -44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0. -45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0. -46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0. -47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0. -48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0. -49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0. -50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0. -51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0. -52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0. -53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0. -54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0. -55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0. -56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0. -57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0. -58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0. -59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0. -60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0. -61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0. -62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0. -63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0. -64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0. -65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0. -66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0. -67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0. -68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0. -69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0. -70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0. -71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0. -72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0. -73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0. -74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0. -75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0. -76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0. -77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0. -78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0. -79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0. -80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0. -81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0. -82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0. -83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0. -84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0. -85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0. -86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0. -87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0. -88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0. -89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0. -90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0. -91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0. -92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0. -93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0. -94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0. -95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0. -96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0. -97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0. -98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0. -99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0. +0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0.,strkeya +1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0.,strkeyb +2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0.,strkey +3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0.,strkey +4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0.,strkey +5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0.,strkey +6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0.,strkey +7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0.,strkey +8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0.,strkey +9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0.,strkey +10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0.,strkeyc +11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0.,strkey +12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0.,strkey +13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0.,strkey +14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0.,strkey +15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0.,strkey +16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0.,strkey +17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0.,strkey +18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0.,strkey +19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0.,strkey +20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0.,strkey +21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0.,strkey +22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0.,strkey +23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0.,strkey +24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0.,strkey +25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0.,strkey +26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0.,strkey +27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0.,strkey +28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0.,strkey +29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0.,strkey +30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0.,strkey +31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0.,strkey +32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0.,strkey +33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0.,strkey +34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0.,strkey +35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0.,strkey +36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0.,strkey +37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0.,strkeyd +38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0.,strkey +39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0.,strkey +40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0.,strkey +41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0.,strkey +42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0.,strkey +43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0.,strkey +44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0.,strkey +45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0.,strkey +46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0.,strkey +47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0.,strkey +48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0.,strkey +49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0.,strkey +50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0.,strkey +51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0.,strkey +52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0.,strkey +53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0.,strkey +54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0.,strkey +55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0.,strkey +56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0.,strkey +57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0.,strkey +58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0.,strkey +59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0.,strkey +60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0.,strkey +61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0.,strkey +62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0.,strkey +63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0.,strkey +64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0.,strkey +65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0.,strkey +66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0.,strkey +67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0.,strkey +68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0.,strkey +69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0.,strkey +70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0.,strkey +71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0.,strkey +72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0.,strkey +73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0.,strkey +74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0.,strkey +75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0.,strkey +76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0.,strkey +77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0.,strkey +78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0.,strkey +79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0.,strkey +80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0.,strkey +81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0.,strkey +82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0.,strkey +83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0.,strkey +84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0.,strkey +85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0.,strkey +86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0.,strkey +87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0.,strkey +88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0.,strkey +89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0.,strkey +90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0.,strkey +91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0.,strkey +92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0.,strkey +93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0.,strkey +94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0.,strkey +95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0.,strkey +96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0.,strkey +97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0.,strkey +98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0.,strkey +99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0.,strkey diff --git a/tensorflow/contrib/timeseries/examples/known_anomaly.py b/tensorflow/contrib/timeseries/examples/known_anomaly.py index 7659dd308a..c08c0b0acb 100644 --- a/tensorflow/contrib/timeseries/examples/known_anomaly.py +++ b/tensorflow/contrib/timeseries/examples/known_anomaly.py @@ -46,12 +46,12 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): # Indicate the format of our exogenous feature, in this case a string # representing a boolean value. - string_feature = tf.contrib.layers.sparse_column_with_keys( - column_name="is_changepoint", keys=["no", "yes"]) + string_feature = tf.feature_column.categorical_column_with_vocabulary_list( + key="is_changepoint", vocabulary_list=["no", "yes"]) # Specify the way this feature is presented to the model, here using a one-hot # encoding. - one_hot_feature = tf.contrib.layers.one_hot_column( - sparse_id_column=string_feature) + one_hot_feature = tf.feature_column.indicator_column( + categorical_column=string_feature) estimator = tf.contrib.timeseries.StructuralEnsembleRegressor( periodicities=12, diff --git a/tensorflow/contrib/timeseries/examples/lstm.py b/tensorflow/contrib/timeseries/examples/lstm.py index f37cafcc50..2eee878196 100644 --- a/tensorflow/contrib/timeseries/examples/lstm.py +++ b/tensorflow/contrib/timeseries/examples/lstm.py @@ -59,10 +59,10 @@ class _LSTMModel(ts_model.SequentialTimeSeriesModel): num_units: The number of units in the model's LSTMCell. num_features: The dimensionality of the time series (features per timestep). - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects representing features which are inputs to the model but are - not predicted by it. These must then be present for training, - evaluation, and prediction. + exogenous_feature_columns: A list of `tf.feature_column`s representing + features which are inputs to the model but are not predicted by + it. These must then be present for training, evaluation, and + prediction. dtype: The floating point data type to use. """ super(_LSTMModel, self).__init__( @@ -189,12 +189,16 @@ def train_and_predict( export_directory=None): """Train and predict using a custom time series model.""" # Construct an Estimator from our LSTM model. + categorical_column = tf.feature_column.categorical_column_with_hash_bucket( + key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ # Exogenous features are not part of the loss, but can inform # predictions. In this example the features have no extra information, but # are included as an API example. - tf.contrib.layers.real_valued_column( - "2d_exogenous_feature", dimension=2)] + tf.feature_column.numeric_column( + "2d_exogenous_feature", shape=(2,)), + tf.feature_column.embedding_column( + categorical_column=categorical_column, dimension=10)] estimator = ts_estimators.TimeSeriesRegressor( model=_LSTMModel(num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), @@ -205,7 +209,11 @@ def train_and_predict( csv_file_name, column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES,) + (tf.contrib.timeseries.TrainEvalFeatures.VALUES,) * 5 - + ("2d_exogenous_feature",) * 2)) + + ("2d_exogenous_feature",) * 2 + + ("categorical_exogenous_feature",)), + # Data types other than for `times` need to be specified if they aren't + # float32. In this case one of our exogenous features has string dtype. + column_dtypes=((tf.int64,) + (tf.float32,) * 7 + (tf.string,))) train_input_fn = tf.contrib.timeseries.RandomWindowInputFn( reader, batch_size=4, window_size=32) estimator.train(input_fn=train_input_fn, steps=training_steps) @@ -215,7 +223,9 @@ def train_and_predict( predict_exogenous_features = { "2d_exogenous_feature": numpy.concatenate( [numpy.ones([1, 100, 1]), numpy.zeros([1, 100, 1])], - axis=-1)} + axis=-1), + "categorical_exogenous_feature": numpy.array( + ["strkey"] * 100)[None, :, None]} (predictions,) = tuple(estimator.predict( input_fn=tf.contrib.timeseries.predict_continuation_input_fn( evaluation, steps=100, diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index f8355f366f..8d13343e82 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.layers.python.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import ar_model from tensorflow.contrib.timeseries.python.timeseries import feature_keys from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib @@ -31,10 +29,12 @@ from tensorflow.contrib.timeseries.python.timeseries.state_space_models.filterin from tensorflow.python.estimator import estimator_lib from tensorflow.python.estimator.export import export_lib +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.training import training as train @@ -117,22 +117,29 @@ class TimeSeriesRegressor(estimator_lib.Estimator): dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) - with ops.Graph().as_default(): - # Default placeholders have only an unknown batch dimension. Make them - # in a separate graph, then splice in the series length to the shapes - # and re-create them in the outer graph. - exogenous_feature_shapes = { - key: (value.get_shape(), value.dtype) for key, value - in feature_column.make_place_holder_tensors_for_base_features( - self._model.exogenous_feature_columns).items()} - for feature_key, (batch_only_feature_shape, value_dtype) in ( - exogenous_feature_shapes.items()): - batch_only_feature_shape = batch_only_feature_shape.with_rank_at_least( - 1).as_list() - feature_shape = ([default_batch_size, default_series_length] - + batch_only_feature_shape[1:]) - placeholders[feature_key] = array_ops.placeholder( - dtype=value_dtype, name=feature_key, shape=feature_shape) + if self._model.exogenous_feature_columns: + with ops.Graph().as_default(): + # Default placeholders have only an unknown batch dimension. Make them + # in a separate graph, then splice in the series length to the shapes + # and re-create them in the outer graph. + parsed_features = ( + feature_column.make_parse_example_spec( + self._model.exogenous_feature_columns)) + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder( + shape=[None], dtype=dtypes.string), + features=parsed_features) + exogenous_feature_shapes = { + key: (value.get_shape(), value.dtype) for key, value + in placeholder_features.items()} + for feature_key, (batch_only_feature_shape, value_dtype) in ( + exogenous_feature_shapes.items()): + batch_only_feature_shape = ( + batch_only_feature_shape.with_rank_at_least(1).as_list()) + feature_shape = ([default_batch_size, default_series_length] + + batch_only_feature_shape[1:]) + placeholders[feature_key] = array_ops.placeholder( + dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. @@ -333,11 +340,11 @@ class StructuralEnsembleRegressor(StateSpaceRegressor): determine the model size. Learning autoregressive coefficients typically requires more steps and a smaller step size than other components. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments, `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]), and diff --git a/tensorflow/contrib/timeseries/python/timeseries/model.py b/tensorflow/contrib/timeseries/python/timeseries/model.py index bac7d1ebf5..7644764a74 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/model.py @@ -21,18 +21,17 @@ from __future__ import print_function import abc import collections -from tensorflow.contrib import layers -from tensorflow.contrib.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import math_utils from tensorflow.contrib.timeseries.python.timeseries.feature_keys import PredictionFeatures from tensorflow.contrib.timeseries.python.timeseries.feature_keys import TrainEvalFeatures +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope @@ -66,11 +65,11 @@ class TimeSeriesModel(object): Args: num_features: Number of features for the time series - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not + part of the series to be predicted. Passed to + `tf.feature_column.input_layer`. dtype: The floating point datatype to use. """ if exogenous_feature_columns: @@ -86,7 +85,7 @@ class TimeSeriesModel(object): @property def exogenous_feature_columns(self): - """`FeatureColumn` objects for features which are not predicted.""" + """`tf.feature_colum`s for features which are not predicted.""" return self._exogenous_feature_columns # TODO(allenl): Move more of the generic machinery for generating and @@ -265,11 +264,14 @@ class TimeSeriesModel(object): if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): - placeholder_features = ( - feature_column.make_place_holder_tensors_for_base_features( + parsed_features = ( + feature_column.make_parse_example_spec( self._exogenous_feature_columns)) - embedded = layers.input_from_feature_columns( - columns_to_tensors=placeholder_features, + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), + features=parsed_features) + embedded = feature_column.input_layer( + features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:] @@ -308,13 +310,13 @@ class TimeSeriesModel(object): # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. - if tensor.get_shape().ndims == 1: + if tensor.get_shape().ndims == 1 and tensor.dtype != dtypes.string: exogenous_features_single_batch_dimension[name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( - layers.input_from_feature_columns( - columns_to_tensors=exogenous_features_single_batch_dimension, + feature_column.input_layer( + features=exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = array_ops.reshape( @@ -381,8 +383,8 @@ class SequentialTimeSeriesModel(TimeSeriesModel): may use _scale_back_data or _scale_back_variance to return predictions to the input scale. dtype: The floating point datatype to use. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects. See `TimeSeriesModel`. + exogenous_feature_columns: A list of `tf.feature_column`s objects. See + `TimeSeriesModel`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py index 6257002647..951c6546d5 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py @@ -112,11 +112,11 @@ class StateSpaceModelConfiguration( exogenous_noise_decreases: If True, exogenous regressors can "set" model state, decreasing uncertainty. If both this parameter and exogenous_noise_increases are False, exogenous regressors are ignored. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a -- GitLab From 7b944492cbe1ac81ea728ecb84ce4ea272627990 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Mon, 26 Feb 2018 14:11:08 -0800 Subject: [PATCH 098/311] Adding documentation for dataset/iterator checkpointing. PiperOrigin-RevId: 187078347 --- .../docs_src/programmers_guide/datasets.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index d19200e80c..d38fbddfa1 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -327,6 +327,35 @@ same op/node (created by `Iterator.get_next()`). Therefore, evaluating *any* of these tensors will advance the iterator for all components. A typical consumer of an iterator will include all components in a single expression. +### Saving iterator state + +The @{tf.contrib.data.make_saveable_from_iterator} function creates a +`SaveableObject` from an iterator, which can be used to save and +restore the current state of the iterator (and, effectively, the whole input +pipeline). A saveable object thus created can be added to @{tf.train.Saver} +variables list or the `tf.GraphKeys.SAVEABLE_OBJECTS` collection for saving and +restoring in the same manner as a @{tf.Variable}. Refer to +@{$saved_model$Saving and Restoring} for details on how to save and restore +variables. + +```python +# Create saveable object from iterator. +saveable = tf.contrib.data.make_saveable_from_iterator(iterator) + +# Save the iterator state by adding it to the saveable objects collection. +tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable) +saver = tf.train.Saver() + +with tf.Session() as sess: + + if should_checkpoint: + saver.save(path_to_checkpoint) + +# Restore the iterator state. +with tf.Session() as sess: + saver.restore(sess, path_to_checkpoint) +``` + ## Reading input data ### Consuming NumPy arrays -- GitLab From 10aaee0c5d83649959d8b1a6c75ee3127c205259 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 14:19:56 -0800 Subject: [PATCH 099/311] [XLA] GTE of a certain element of the tuple does not need not keep other elements alive. This achieves two things: 1. Heap simulation runtime is no longer quadratic in the number of tuple elements (as we don't add each GetTupleElement to the liveset of each buffer defined by the tuple). 2. A reduction in the heap memory footprint. PiperOrigin-RevId: 187079787 --- .../compiler/xla/service/heap_simulator.cc | 135 ++++++++++-------- .../xla/service/heap_simulator_test.cc | 50 +++++++ 2 files changed, 127 insertions(+), 58 deletions(-) diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc index a2d13c013c..3dd4c4a079 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.cc +++ b/tensorflow/compiler/xla/service/heap_simulator.cc @@ -27,38 +27,6 @@ namespace xla { using tensorflow::gtl::FlatMap; using tensorflow::gtl::FlatSet; -namespace { - -// Returns the set of buffers that may be sources of all operands of the given -// instruction. The returned buffers are guaranteed to have no duplicates, and -// to be sorted in a deterministic order. -std::vector UniqueOperandSourceBuffers( - const HloInstruction* instruction, - const TuplePointsToAnalysis& points_to_analysis) { - std::vector buffers; - for (const HloInstruction* operand : instruction->operands()) { - points_to_analysis.GetPointsToSet(operand).ForEachElement( - [&](const ShapeIndex& /*index*/, - const PointsToSet::BufferList& points_to) { - buffers.insert(buffers.end(), points_to.begin(), points_to.end()); - }); - } - - // Sort and then remove duplicates from buffers. - std::sort(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() < b->id(); - }); - buffers.erase(std::unique(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() == b->id(); - }), - buffers.end()); - return buffers; -} - -} // namespace - /*static*/ StatusOr HeapSimulator::Run( std::unique_ptr algorithm, const HloModule& module, @@ -93,6 +61,7 @@ Status HeapSimulator::RunComputation( const HloComputation& computation, const std::vector& instruction_sequence, const TuplePointsToAnalysis& points_to_analysis) { + VLOG(3) << "Computation:\n" << computation.ToString(); // The goal here is to minimize memory usage, assuming the given sequential // ordering of instructions. The strategy is to walk through the instruction // sequence, calling Alloc and Free on the underlying heap algorithm. The @@ -101,7 +70,51 @@ Status HeapSimulator::RunComputation( // 'live_buffers' tracks the liveness of each buffer that we assign, by // associating it with a set of HloInstructions that need to be visited. When // the set becomes empty, the buffer is no longer used, and can be freed. + // 'used_buffers' is the reverse map - it tracks which buffers were used by an + // instruction, so that we can remove the instructions from a buffer's live + // set after they are visited. FlatMap> live_buffers; + FlatMap> used_buffers; + auto add_user_to_buffer = [this, &live_buffers, &used_buffers]( + const HloInstruction* user, + const LogicalBuffer* buffer) { + if (!IgnoreBuffer(buffer)) { + VLOG(4) << " Adding user " << user->name() << " to buffer " + << buffer->ToString(); + live_buffers[buffer].insert(user); + used_buffers[user].insert(buffer); + } + }; + + // Initialize live_buffers for each buffer that we're going to assign. The + // set of instructions that need to be visited contains all users of all + // aliases, that is, all users of all instructions that have the buffer + // contained in their points-to set. + for (const HloInstruction* instruction : instruction_sequence) { + const PointsToSet& points_to = + points_to_analysis.GetPointsToSet(instruction); + const PointsToSet::BufferSet& buffer_set = points_to.CreateFlattenedSet(); + for (const HloInstruction* user : instruction->users()) { + if (user->opcode() != HloOpcode::kGetTupleElement) { + for (const LogicalBuffer* buffer : buffer_set) { + add_user_to_buffer(user, buffer); + } + } else { + // A GetTupleElement doesn't need to keep all of its operand's buffers + // alive. It only needs the buffers that relate to the element its + // extracting, and the tuple it's extracting from, but not the buffers + // for the other elements. + for (const LogicalBuffer* buffer : points_to.element({})) { + add_user_to_buffer(user, buffer); + } + const PointsToSet& gte_points_to = + points_to_analysis.GetPointsToSet(user); + for (const LogicalBuffer* buffer : gte_points_to.CreateFlattenedSet()) { + add_user_to_buffer(user, buffer); + } + } + } + } const HloInstruction* root = computation.root_instruction(); auto output_source_buffers = @@ -114,34 +127,17 @@ Status HeapSimulator::RunComputation( buffers_defined_by_instruction = points_to_analysis.GetBuffersDefinedByInstruction(instruction); - // Initialize live_buffers for each buffer that we're going to assign. The - // set of instructions that need to be visited contains all users of all - // aliases. The alias itself is not necessary; if it has users, the users - // are necessarily scheduled after the alias. And if it has no users, it is - // either a dead value or an output, both of which are handled below. - // - // We ignore control dependencies here. The reasoning is that the control - // dependencies have already been accounted for in the ordering of the given - // 'instruction_sequence', and should not otherwise artificially extend the - // lifetime of buffers that aren't already connected by a data dependency. + VLOG(3) << "Instruction: " << instruction->ToString(); + for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { + VLOG(4) << " Defines: " << buffer->ToString() + << (IgnoreBuffer(buffer) ? " (Ignored)" : ""); + } + dead_buffers_to_free.clear(); for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { if (IgnoreBuffer(buffer)) { continue; } - FlatSet* live_set = nullptr; - for (const BufferAlias& alias : - points_to_analysis.GetBufferAliases(*buffer)) { - const std::vector& users = - alias.instruction()->users(); - if (!users.empty()) { - if (live_set == nullptr) { - live_set = &live_buffers[buffer]; - } - live_set->insert(users.begin(), users.end()); - } - } - // Add a nullptr sentry to ensure entry parameters and output source // buffers are not freed until the very end. const bool entry_parameter = @@ -165,11 +161,12 @@ Status HeapSimulator::RunComputation( // have no instructions left to visit are moved from live_buffers to // operand_buffers_to_free. operand_buffers_to_free.clear(); - for (const LogicalBuffer* operand_buffer : - UniqueOperandSourceBuffers(instruction, points_to_analysis)) { + for (const LogicalBuffer* operand_buffer : used_buffers[instruction]) { if (IgnoreBuffer(operand_buffer)) { continue; } + VLOG(4) << " Removing user " << instruction->name() << " from buffer " + << operand_buffer->ToString(); auto it = live_buffers.find(operand_buffer); FlatSet* live_set = &it->second; live_set->erase(instruction); @@ -178,6 +175,11 @@ Status HeapSimulator::RunComputation( operand_buffers_to_free.push_back(operand_buffer); } } + // Sort to get a deterministic iteration order. + std::sort(operand_buffers_to_free.begin(), operand_buffers_to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); // Allocate buffers defined by this instruction. This is the latest point // that we can allocate; right before the buffer is first used. This must @@ -203,6 +205,8 @@ Status HeapSimulator::RunComputation( CanShareOperandBufferWithUser( operand_buffer->instruction(), operand_buffer->index(), buffer->instruction(), buffer->index(), points_to_analysis)) { + VLOG(3) << " Sharing: " << buffer->ToString() << " with " + << operand_buffer->ToString(); ShareBuffer(buffer, operand_buffer, instruction); shared = true; break; @@ -211,6 +215,7 @@ Status HeapSimulator::RunComputation( } if (!shared) { + VLOG(3) << " Allocating: " << buffer->ToString(); Alloc(buffer, instruction); } } @@ -244,20 +249,34 @@ Status HeapSimulator::RunComputation( // Free buffers that are no longer live. This is the earliest point that we // can de-allocate; right after the last use of the buffer. for (const LogicalBuffer* buffer : dead_buffers_to_free) { + VLOG(3) << " Freeing dead: " << buffer->ToString(); Free(buffer, instruction); } for (const LogicalBuffer* buffer : operand_buffers_to_free) { + VLOG(3) << " Freeing operand: " << buffer->ToString(); Free(buffer, instruction); } } // Any remaining live buffers must be entry parameters or output source - // buffers, which had a nullptr sentry added. Free them now. + // buffers, which had a nullptr sentry added. Free them now, in a + // deterministic order. + std::vector to_free; + to_free.reserve(live_buffers.size()); for (const auto& buffer_pending : live_buffers) { const LogicalBuffer* buffer = buffer_pending.first; const FlatSet& pending = buffer_pending.second; CHECK_EQ(pending.size(), 1) << *buffer; CHECK(*pending.begin() == nullptr) << *buffer; + to_free.push_back(buffer); + } + + std::sort(to_free.begin(), to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); + for (const LogicalBuffer* buffer : to_free) { + VLOG(3) << "Freeing pending: " << buffer->ToString(); Free(buffer, root); } diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc index 387b649a73..688a271712 100644 --- a/tensorflow/compiler/xla/service/heap_simulator_test.cc +++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc @@ -410,6 +410,56 @@ TEST_F(HeapSimulatorTest, MultiplyDotDotTuple) { }); } +TEST_F(HeapSimulatorTest, IndependentTupleElements) { + auto builder = HloComputation::Builder(TestName()); + auto paramA = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32scalar_, "paramA")); + auto paramB = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32scalar_, "paramB")); + auto mul = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kMultiply, paramA, paramB)); + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kAdd, paramA, paramB)); + auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({mul, add})); + auto element0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 0)); + auto broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(f32vec4_, element0, {0})); + auto sub = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kSubtract, paramA, paramB)); + auto element1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 1)); + auto output = builder.AddInstruction( + HloInstruction::CreateTuple({broadcast, sub, element1})); + + HeapSimulatorTracker tracker(TestName(), builder.Build(), + {paramA, paramB, mul, add, tuple, element0, + broadcast, sub, element1, output}); + tracker.ExpectCallSequence({ + {kAlloc, tracker.BufferAt(paramA, {})}, + {kAlloc, tracker.BufferAt(paramB, {})}, + {kAlloc, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(add, {})}, + {kAlloc, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(broadcast, {})}, + // The mul can be freed right after the broadcast happens, even though + // The other GetTupleElement is still alive. + {kFree, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(sub, {})}, + // The temporary tuple is now dead. + {kFree, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(output, {})}, + // All params and outputs are freed at the end. + {kFree, tracker.BufferAt(paramA, {})}, + {kFree, tracker.BufferAt(paramB, {})}, + {kFree, tracker.BufferAt(add, {})}, + {kFree, tracker.BufferAt(broadcast, {})}, + {kFree, tracker.BufferAt(sub, {})}, + {kFree, tracker.BufferAt(output, {})}, + {kFinish, nullptr}, + }); +} + TEST_F(HeapSimulatorTest, WholeModule) { HeapSimulatorTracker tracker(TestName()); -- GitLab From c3ad72500cd714a39af5ab530ab14f477cc717c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:30 -0800 Subject: [PATCH 100/311] 1st version of sequential feature columns. PiperOrigin-RevId: 187080635 --- tensorflow/contrib/feature_column/BUILD | 31 +- .../sequential_feature_column.py | 308 +++++++++++- .../sequential_feature_column_test.py | 471 ++++++++++++++++++ 3 files changed, 808 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index 6fc053759c..a53e36c2d5 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -33,5 +33,34 @@ py_library( name = "sequential_feature_column", srcs = ["python/feature_column/sequential_feature_column.py"], srcs_version = "PY2AND3", - deps = [], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:variable_scope", + "//tensorflow/python/feature_column", + ], +) + +py_test( + name = "sequential_feature_column_test", + srcs = ["python/feature_column/sequential_feature_column_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":sequential_feature_column", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:training", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + ], ) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py index 690a44ff43..4ed7268e7a 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py @@ -12,8 +12,314 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experimental methods for tf.feature_column sequential input.""" +"""Experimental methods for tf.feature_column sequence input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function + + +import abc +import collections + + +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import variable_scope + +# TODO(b/73160931): Fix pydoc. +# pylint: disable=g-doc-args,missing-docstring,protected-access +# TODO(b/73827486): Support SequenceExample. + + +def sequence_input_layer( + features, + feature_columns, + weight_collections=None, + trainable=True, + scope=None): + """"Builds input layer for sequence input. + + All `feature_columns` must be sequence dense columns with the same + `sequence_length`. The output of this method can be fed into sequence + networks, such as RNN. + + The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ from + batch to batch. + + If multiple `feature_columns` are given with `Di` `num_elements` each, their + outputs are concatenated. So, the final `Tensor` has shape + `[batch_size, T, D0 + D1 + ... + Dn]`. + + Example: + + ```python + rating = sequence_numeric_column('rating') + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [rating, watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Returns: + An `(input_layer, sequence_length)` tuple where: + - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ + from batch to batch. `D` is the sum of `num_elements` for all + `feature_columns`. + - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence + length for each example. + Raises: + ValueError: If any of the `feature_columns` is the wrong type. + """ + feature_columns = fc._clean_feature_columns(feature_columns) + for c in feature_columns: + if not isinstance(c, _SequenceDenseColumn): + raise ValueError( + 'All feature_columns must be of type _SequenceDenseColumn. ' + 'Given (type {}): {}'.format(type(c), c)) + + with variable_scope.variable_scope( + scope, default_name='sequence_input_layer', values=features.values()): + builder = fc._LazyBuilder(features) + output_tensors = [] + sequence_lengths = [] + ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): + ordered_columns.append(column) + with variable_scope.variable_scope( + None, default_name=column._var_scope_name): + dense_tensor, sequence_length = column._get_sequence_dense_tensor( + builder, + weight_collections=weight_collections, + trainable=trainable) + # Flattens the final dimension to produce a 3D Tensor. + num_elements = column._variable_shape.num_elements() + shape = array_ops.shape(dense_tensor) + output_tensors.append( + array_ops.reshape( + dense_tensor, + shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) + # TODO(b/73160931): Verify sequence_length equality. + return array_ops.concat(output_tensors, -1), sequence_lengths[0] + + +# TODO(b/73160931): Add remaining categorical columns. +def sequence_categorical_column_with_identity( + key, num_buckets, default_value=None): + return _SequenceCategoricalColumn( + fc.categorical_column_with_identity( + key=key, + num_buckets=num_buckets, + default_value=default_value)) + + +# TODO(b/73160931): Merge with embedding_column +def _sequence_embedding_column( + categorical_column, dimension, initializer=None, ckpt_to_load_from=None, + tensor_name_in_ckpt=None, max_norm=None, trainable=True): + if not isinstance(categorical_column, _SequenceCategoricalColumn): + raise ValueError( + 'categorical_column must be of type _SequenceCategoricalColumn. ' + 'Given (type {}): {}'.format( + type(categorical_column), categorical_column)) + return _SequenceEmbeddingColumn( + fc.embedding_column( + categorical_column, + dimension=dimension, + initializer=initializer, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable)) + + +def sequence_numeric_column( + key, + shape=(1,), + default_value=0., + dtype=dtypes.float32): + # TODO(b/73160931): Add validations. + return _SequenceNumericColumn( + key, + shape=shape, + default_value=default_value, + dtype=dtype) + + +class _SequenceDenseColumn(fc._FeatureColumn): + """Represents dense sequence data.""" + + __metaclass__ = abc.ABCMeta + + TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name + 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) + + @abc.abstractproperty + def _variable_shape(self): + """`TensorShape` without batch and sequence dimensions.""" + pass + + @abc.abstractmethod + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + """Returns a `TensorSequenceLengthPair`.""" + pass + + +def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): + with ops.name_scope(None, 'sequence_length') as name_scope: + row_ids = sp_tensor.indices[:, 0] + column_ids = sp_tensor.indices[:, 1] + column_ids += array_ops.ones_like(column_ids) + seq_length = ( + math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) + # If the last n rows do not have ids, seq_length will have shape + # [batch_size - n]. Pad the remaining values with zeros. + n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] + padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) + return array_ops.concat([seq_length, padding], axis=0, name=name_scope) + + +class _SequenceCategoricalColumn( + fc._CategoricalColumn, + collections.namedtuple( + '_SequenceCategoricalColumn', ['categorical_column'])): + + @property + def name(self): + return self.categorical_column.name + + @property + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.categorical_column._transform_feature(inputs) + + @property + def _num_buckets(self): + return self.categorical_column._num_buckets + + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + id_tensor = sparse_tensors.id_tensor + weight_tensor = sparse_tensors.weight_tensor + # Expands final dimension, so that embeddings are not combined during + # embedding lookup. + check_id_rank = check_ops.assert_equal( + array_ops.rank(id_tensor), 2, + data=[ + 'Column {} expected ID tensor of rank 2. '.format(self.name), + 'id_tensor shape: ', array_ops.shape(id_tensor)]) + with ops.control_dependencies([check_id_rank]): + id_tensor = sparse_ops.sparse_reshape( + id_tensor, + shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) + if weight_tensor is not None: + check_weight_rank = check_ops.assert_equal( + array_ops.rank(weight_tensor), 2, + data=[ + 'Column {} expected weight tensor of rank 2.'.format(self.name), + 'weight_tensor shape:', array_ops.shape(weight_tensor)]) + with ops.control_dependencies([check_weight_rank]): + weight_tensor = sparse_ops.sparse_reshape( + weight_tensor, + shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) + return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) + + def _sequence_length(self, inputs): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) + + +class _SequenceEmbeddingColumn( + _SequenceDenseColumn, + collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): + + @property + def name(self): + return self.embedding_column.name + + @property + def _parse_example_spec(self): + return self.embedding_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.embedding_column._transform_feature(inputs) + + @property + def _variable_shape(self): + return self.embedding_column._variable_shape + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + dense_tensor = self.embedding_column._get_dense_tensor( + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) + sequence_length = self.embedding_column.categorical_column._sequence_length( + inputs) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + + +class _SequenceNumericColumn( + _SequenceDenseColumn, + collections.namedtuple( + '_SequenceNumericColumn', + ['key', 'shape', 'default_value', 'dtype'])): + + @property + def name(self): + return self.key + + @property + def _parse_example_spec(self): + return {self.key: parsing_ops.VarLenFeature(self.dtype)} + + def _transform_feature(self, inputs): + return inputs.get(self.key) + + @property + def _variable_shape(self): + return tensor_shape.TensorShape(self.shape) + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + # Do nothing with weight_collections and trainable since no variables are + # created in this function. + del weight_collections + del trainable + sp_tensor = inputs.get(self) + dense_tensor = sparse_ops.sparse_tensor_to_dense( + sp_tensor, default_value=self.default_value) + # Reshape into [batch_size, T, variable_shape]. + dense_shape = array_ops.concat( + [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], + axis=0) + dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) + sequence_length = _sequence_length_from_sparse_tensor( + sp_tensor, num_elements=self._variable_shape.num_elements()) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + +# pylint: enable=g-doc-args,missing-docstring,protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py new file mode 100644 index 0000000000..59674869a2 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py @@ -0,0 +1,471 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sequential_feature_column.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc +from tensorflow.python.feature_column.feature_column import _LazyBuilder +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.platform import test +from tensorflow.python.training import monitored_session + + +class SequenceInputLayerTest(test.TestCase): + + def test_embedding_column(self): + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + + embedding_dimension_a = 2 + embedding_values_a = ( + (1., 2.), # id 0 + (3., 4.), # id 1 + (5., 6.) # id 2 + ) + embedding_dimension_b = 3 + embedding_values_b = ( + (11., 12., 13.), # id 0 + (14., 15., 16.), # id 1 + (17., 18., 19.) # id 2 + ) + def _get_initializer(embedding_dimension, embedding_values): + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + return _initializer + + expected_input_layer = [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], + ] + expected_sequence_length = [1, 2] + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = sfc._sequence_embedding_column( + categorical_column_a, dimension=embedding_dimension_a, + initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) + categorical_column_b = sfc.sequence_categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_b = sfc._sequence_embedding_column( + categorical_column_b, dimension=embedding_dimension_b, + initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + }, + # Test that columns are reordered alphabetically. + feature_columns=[embedding_column_b, embedding_column_a]) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('sequence_input_layer/aaa_embedding/embedding_weights:0', + 'sequence_input_layer/bbb_embedding/embedding_weights:0'), + tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) + self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_input_layer = [ + [[0.], [1.]], + [[10.], [0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column_multi_dim(self): + """Tests sequence_input_layer for multi-dimensional numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + # The output of numeric_column._get_dense_tensor should be flattened. + expected_input_layer = [ + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +def _assert_sparse_tensor_value(test_case, expected, actual): + test_case.assertEqual(np.int64, np.array(actual.indices).dtype) + test_case.assertAllEqual(expected.indices, actual.indices) + + test_case.assertEqual( + np.array(expected.values).dtype, np.array(actual.values).dtype) + test_case.assertAllEqual(expected.values, actual.values) + + test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) + test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) + + +class SequenceCategoricalColumnWithIdentityTest(test.TestCase): + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((1, 2, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_value( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_get_sparse_tensors_inputs3d(self): + """Tests _get_sparse_tensors when the input is already 3D Tensor.""" + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 2, 0), + dense_shape=(2, 2, 1)) + + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'Column aaa expected ID tensor of rank 2\.\s*' + r'id_tensor shape:\s*\[2 2 1\]'): + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({'aaa': inputs})) + with monitored_session.MonitoredSession() as sess: + id_weight_pair.id_tensor.eval(session=sess) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_zeros(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((1, 0), (3, 0), (3, 1)), + values=(1, 2, 0), + dense_shape=(5, 2)) + expected_sequence_length = [0, 1, 0, 2, 0] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceEmbeddingColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)) + + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + expected_lookups = [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]], + ] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=embedding_dimension, + initializer=_initializer) + + embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('embedding_weights:0',), tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) + self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) + + def test_sequence_length(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [] + # example 1, ids [2] + # example 2, ids [0, 1] + # example 3, ids [] + # example 4, ids [1] + # example 5, ids [] + indices=((1, 0), (2, 0), (2, 1), (4, 0)), + values=(2, 0, 1, 1), + dense_shape=(6, 2)) + expected_sequence_length = [0, 1, 2, 0, 1, 0] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceNumericColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_dense_tensor = [ + [[0.], [1.]], + [[10.], [0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa') + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_sequence_dense_tensor_with_shape(self): + """Tests get_sequence_dense_tensor with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_dense_tensor = [ + [[0., 1., 2.], [3., 4., 5.]], + [[10., 11., 12.], [0., 0., 0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_dense_tensor_multi_dim(self): + """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + expected_dense_tensor = [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_sequence_length(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_shape(self): + """Tests _sequence_length with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [] + # example 1, values [[0.], [1.]] + # example 2, [[2.]] + # example 3, values [] + # example 4, [[3.]] + # example 5, values [] + indices=((1, 0), (1, 1), (2, 0), (4, 0)), + values=(0., 1., 2., 3.), + dense_shape=(6, 2)) + expected_sequence_length = [0, 2, 1, 0, 1, 0] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +if __name__ == '__main__': + test.main() -- GitLab From 26cb7de9c03a9d73703decec8c917651369ee9ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:37 -0800 Subject: [PATCH 101/311] Add a function that allows to dynamically verify whether a function is white listed for graph mode. PiperOrigin-RevId: 187080654 --- tensorflow/contrib/py2tf/impl/conversion.py | 18 ++++++++++++++++++ .../contrib/py2tf/impl/conversion_test.py | 11 +++++++++++ 2 files changed, 29 insertions(+) diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 044de33568..d95469ea53 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -97,6 +97,24 @@ class ConversionMap(object): self.dependency_cache[original_entity] = converted_ast +def is_whitelisted_for_graph(o): + """Check whether an entity is whitelisted for use in graph mode. + + Examples of whitelisted entities include all members of the tensorflow + package. + + Args: + o: A Python entity. + Returns: + Boolean + """ + m = tf_inspect.getmodule(o) + for prefix, in config.DEFAULT_UNCOMPILED_MODULES: + if m.__name__.startswith(prefix): + return True + return False + + def entity_to_graph(o, conversion_map, arg_values, arg_types): """Compile a Python entity into equivalent TensorFlow. diff --git a/tensorflow/contrib/py2tf/impl/conversion_test.py b/tensorflow/contrib/py2tf/impl/conversion_test.py index 7816f95857..9ff256aace 100644 --- a/tensorflow/contrib/py2tf/impl/conversion_test.py +++ b/tensorflow/contrib/py2tf/impl/conversion_test.py @@ -20,12 +20,23 @@ from __future__ import print_function import gast +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl import conversion +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test class ConversionTest(test.TestCase): + def test_is_whitelisted_for_graph(self): + + def test_fn(): + return constant_op.constant(1) + + self.assertFalse(conversion.is_whitelisted_for_graph(test_fn)) + self.assertTrue(conversion.is_whitelisted_for_graph(utils)) + self.assertTrue(conversion.is_whitelisted_for_graph(constant_op.constant)) + def test_entity_to_graph_unsupported_types(self): with self.assertRaises(ValueError): conversion_map = conversion.ConversionMap(True, (), (), None) -- GitLab From f4a396bcecd8b27caba0c10a50e1f6b56dbcf6a9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:31:29 -0800 Subject: [PATCH 102/311] [TF:XLA] Bump open source llvm revision to r326083 PiperOrigin-RevId: 187081592 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 278a225f76..9009f08163 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -476,11 +476,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", ], - sha256 = "f5721d9cc18a9109c9e9f847f48e69b710b961cee83e6691227e310cb3b5da58", - strip_prefix = "llvm-fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14", + sha256 = "63d4da54dc7bc9a79e2ad266d230f4f759520cccb344a2dd49c2c6383ab75285", + strip_prefix = "llvm-8f7bcdf3c65b9a47e35653d525135beb18f3ac25", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From c1e22e9fc1b8db5390c466a2ffb5da8b1abf15b4 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:32:08 -0800 Subject: [PATCH 103/311] Track DebugOptions in AotCompilationOptions In particular, I need this for supporting HLO profiling in the AOT backend. PiperOrigin-RevId: 187081674 --- tensorflow/compiler/xla/service/compile_only_service.cc | 3 +-- tensorflow/compiler/xla/service/compiler.cc | 3 +++ tensorflow/compiler/xla/service/compiler.h | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index dab73596e1..6664496ab6 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -72,8 +72,7 @@ CompileOnlyService::CompileAheadOfTime( VersionedComputationHandle versioned_handle = user_computation->GetVersionedHandle(); - // TODO(b/63773457): Track DebugOptions in AotCompilationOptions. - DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags(); + const DebugOptions& debug_options = options.debug_options(); // Dump computation proto state if flag is set. const string& directory_path = debug_options.xla_dump_computations_to(); diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index e2e9d2a0c0..0392d4af48 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -86,4 +86,7 @@ Compiler::GetPlatformCompilers() { return compilers->at(platform->id()).get(); } +AotCompilationOptions::AotCompilationOptions() + : debug_options_(legacy_flags::GetDebugOptionsFromFlags()) {} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 74fd24edf8..33e19efc72 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -79,11 +79,15 @@ class AotCompilationOptions { device_allocator_ = device_allocator; } + const DebugOptions& debug_options() const { return debug_options_; } + DebugOptions* mutable_debug_options() { return &debug_options_; } + protected: - AotCompilationOptions() = default; + AotCompilationOptions(); private: DeviceMemoryAllocator* device_allocator_ = nullptr; + DebugOptions debug_options_; }; // Abstract compiler interface that is subclassed for compilation on a -- GitLab From 3653257c729f651c787b6fa04788084191478c3e Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 14:38:31 -0800 Subject: [PATCH 104/311] Enable de/serialization of nested control flow. This is a follow-up to the previous commit (https://github.com/tensorflow/tensorflow/commit/23851760b7b099214bdd4f1b88156d7ac2bdd2a2). It adds the new proto schemas, enables the behavior for reading and writing the new protos, and adds a test for de/serializing nested while loops. There's still a bug preventing deserializing conds, which will be addressed in another change. PiperOrigin-RevId: 187082713 --- tensorflow/core/protobuf/control_flow.proto | 17 ++++++- tensorflow/python/ops/control_flow_ops.py | 54 ++++++-------------- tensorflow/python/training/saver_test.py | 56 +++++++++++++++++++++ 3 files changed, 88 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto index 2c9476a08a..3c05b4f0e2 100644 --- a/tensorflow/core/protobuf/control_flow.proto +++ b/tensorflow/core/protobuf/control_flow.proto @@ -17,6 +17,15 @@ message ValuesDef { map external_values = 2; } +// Container for any kind of control flow context. Any other control flow +// contexts that are added below should also be added here. +message ControlFlowContextDef { + oneof ctxt { + CondContextDef cond_ctxt = 1; + WhileContextDef while_ctxt = 2; + } +} + // Protocol buffer representing a CondContext object. message CondContextDef { // Name of the context. @@ -33,6 +42,9 @@ message CondContextDef { // Values and external values in control flow context. ValuesDef values_def = 5; + + // Contexts contained inside this context (e.g. nested conds). + repeated ControlFlowContextDef nested_contexts = 6; } // Protocol buffer representing a WhileContext object. @@ -70,5 +82,8 @@ message WhileContextDef { // Optional name of the maximum_iterations tensor. string maximum_iterations_name = 11; - // Next available id: 12. + // Contexts contained inside this context (e.g. nested whiles). + repeated ControlFlowContextDef nested_contexts = 12; + + // Next available id: 13. } diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 152578c0c6..b16901effd 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1765,13 +1765,9 @@ class CondContext(ControlFlowContext): context_def.branch = self._branch context_def.values_def.MergeFrom(super(CondContext, self)._to_values_def( export_scope)) - # TODO(b/72868227): enable this once the corresponding control_flow.proto - # changes have been checked in (they aren't checked in and this is - # disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -1783,14 +1779,10 @@ class CondContext(ControlFlowContext): ret = CondContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is here for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def) + ret.Exit() return ret def to_control_flow_context_def(self, context_def, export_scope=None): @@ -2108,10 +2100,7 @@ def cond(pred, # Only add non-nested conds to the collection. Any nested control flow will # be encapsulated in the root context. assert context_t.outer_context == context_f.outer_context - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or context_t.outer_context is None: + if context_t.outer_context is None: ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_t) ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_f) @@ -2334,13 +2323,9 @@ class WhileContext(ControlFlowContext): context_def.values_def.MergeFrom( super(WhileContext, self)._to_values_def( export_scope=export_scope)) - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -2362,14 +2347,10 @@ class WhileContext(ControlFlowContext): """ ret = WhileContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def, import_scope=import_scope) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def, import_scope=import_scope) + ret.Exit() return ret def GetWhileContext(self): @@ -3214,10 +3195,7 @@ def while_loop(cond, swap_memory=swap_memory) # Only add non-nested loops to the collection. Any nested control flow will # be encapsulated in the root context. - # TODO(b/72868227): enable condition once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or loop_context.outer_context is None: + if loop_context.outer_context is None: ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context) result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants) if maximum_iterations is not None: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index f00f98db00..b366ed30f3 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -53,6 +53,7 @@ from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import partitioned_variables @@ -2040,6 +2041,61 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) + def testNestedWhileLoops(self): + test_dir = self._get_test_dir("nested_whiles") + filename = os.path.join(test_dir, "metafile") + saver_ckpt = os.path.join(test_dir, "saver.ckpt") + + # Create two simple nested while loops. + with ops_lib.Graph().as_default(): + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + + var = variables.Variable(0) + var_name = var.name + + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + [0, var]) + output_name = output.name + + init_op = variables.global_variables_initializer() + + # Generate a MetaGraphDef containing the nested loops. + with session.Session() as sess: + sess.run(init_op) + sess.run(output) + saver = saver_module.Saver() + saver.save(sess, saver_ckpt) + saver.export_meta_graph(filename) + + # Build and run the gradients of the nested while loop. We use this below + # to verify that the gradients are correct with an imported MetaGraphDef. + grad = gradients_impl.gradients([output], [var]) + with session.Session() as sess: + sess.run(init_op) + expected_grad_value = sess.run(grad) + + # Restore the MetaGraphDef into a new Graph. + with ops_lib.Graph().as_default(): + with session.Session() as sess: + saver = saver_module.import_meta_graph(filename) + saver.restore(sess, saver_ckpt) + + # Make sure we can still build gradients and get the same result. + var = ops_lib.get_default_graph().get_tensor_by_name(var_name) + output = ops_lib.get_default_graph().get_tensor_by_name(output_name) + grad = gradients_impl.gradients([output], [var]) + + init_op = variables.global_variables_initializer() + + with session.Session() as sess: + sess.run(init_op) + actual_grad_value = sess.run(grad) + self.assertEqual(expected_grad_value, actual_grad_value) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From 854a07650f33be545441a08f5db84a0f05a8b88e Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 15:37:27 -0800 Subject: [PATCH 105/311] [XLA::Interpreter] Add support for kCall to HloEvaluator. Also enable xla/tests/call_test to run on interpreter. PiperOrigin-RevId: 187092587 --- .../compiler/xla/service/hlo_evaluator.cc | 20 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 3 +++ 3 files changed, 25 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 15ae53128a..fd06b19144 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2445,6 +2445,26 @@ Status HloEvaluator::HandleCopy(HloInstruction* copy) { return Status::OK(); } +Status HloEvaluator::HandleCall(HloInstruction* call) { + auto* computation = call->to_apply(); + auto operands = call->operands(); + + std::vector arg_literals; + arg_literals.reserve(operands.size()); + for (auto operand : operands) { + const Literal& arg_literal = GetEvaluatedLiteralFor(operand); + arg_literals.push_back(&arg_literal); + } + + HloEvaluator embedded_evaluator; + std::unique_ptr result = + embedded_evaluator.Evaluate(*computation, arg_literals) + .ConsumeValueOrDie(); + + evaluated_[call] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 3b2b697e49..c65d9915e3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleCall(HloInstruction* call) override; + private: // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 97abf217d7..33fde9737d 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1143,6 +1143,9 @@ xla_test( xla_test( name = "call_test", srcs = ["call_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", -- GitLab From acf78b20f71dd8c3a928b1f12ea4de6f5028fc48 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 15:37:40 -0800 Subject: [PATCH 106/311] Uses a thread pool for graph functions in eager mode with inter_op_parallelism_threads. PiperOrigin-RevId: 187092622 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +++++++++++++- tensorflow/c/eager/runtime.cc | 14 ++++++++++---- tensorflow/c/eager/runtime.h | 3 +++ tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index e55cb672e9..16a2a15072 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,6 +21,7 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ + "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index bebb63c746..b233dd5b93 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = - tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); + status->status = tensorflow::KernelAndDevice::Init( + ndef, ctx->func_lib(device), &ctx->runner, kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 3356054cd0..29944df4c2 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -45,7 +46,15 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : policy(opts.policy), + : thread_pool(new tensorflow::thread::ThreadPool( + opts.session_options.options.env, "EagerCompute", + opts.session_options.options.config + .inter_op_parallelism_threads() != 0 + ? opts.session_options.options.config + .inter_op_parallelism_threads() + : tensorflow::port::NumSchedulableCPUs())), + runner([this](std::function f) { thread_pool->Schedule(f); }), + policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -54,6 +63,9 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} + const std::unique_ptr thread_pool; + std::function)> runner; + const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index 4bf24fec2c..b9618420f0 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,17 +255,22 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; + out->runner_ = nullptr; + out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; + out->runner_ = runner; + out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -296,10 +301,11 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - // TODO(apassos): use a thread pool. - std::function)> runner = - [](std::function f) { f(); }; - params.runner = &runner; + if (runner_ == nullptr) { + params.runner = &default_runner_; + } else { + params.runner = runner_; + } OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 7fede4dae9..fa5f839977 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,6 +169,7 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -188,6 +189,8 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; + std::function)>* runner_; + std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index 643153058c..ab0b535e1a 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); + Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From 260f5b8fe144cd369fde755739806449a2901252 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 26 Feb 2018 15:42:52 -0800 Subject: [PATCH 107/311] [XLA] Fix #17090 a problem in IrArray::Index::SourceIndexOfTranspose. Agebraic simplification transforms bitcast-equivalent transpose/reshape instructions to bitcast instructions before IR emission. As such, we should skip the checking on whether a transpose/reshape instruction is bitcast-equivalent or not during IR emission. Remove the call from IrArray::Index::SourceIndexOfTranspose to ShapeUtil::TransposeIsBitcast. Also remove the call from IrArray::Index::SourceIndexOfReshape to ShapeUtil::ReshapeIsBitcast. Remove the calls to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast from NotWorthHoistingIndividually because layout assignment hasn't been done there yet. Instead, returns true when the input is a transpose or reshape instruction, to prevent it from being hoisted out of loops. Add a check to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast to make sure that both input shape and output shape have layouts. Add two test cases. PiperOrigin-RevId: 187093399 --- .../xla/service/layout_assignment_test.cc | 79 +++++++++++++++++++ .../compiler/xla/service/llvm_ir/ir_array.cc | 8 +- .../while_loop_invariant_code_motion.cc | 12 +-- tensorflow/compiler/xla/shape_util.cc | 14 +--- tensorflow/compiler/xla/shape_util.h | 4 + 5 files changed, 95 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 88e5caaf47..62feb7c1e9 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -590,6 +590,85 @@ TEST_F(LayoutAssignmentTest, TransposeToBitcastToUser) { transpose->shape(), {2, 3, 0, 1})); } +// TransposeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, TransposeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = builder.AddInstruction( + HloInstruction::CreateTranspose(input_shape, param, {0, 2, 1})); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH(ShapeUtil::TransposeIsBitcast(hlo->operand(0)->shape(), + hlo->shape(), hlo->dimensions()), + "LayoutUtil::HasLayout"); +} + +// ReshapeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, ReshapeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = + builder.AddInstruction(HloInstruction::CreateReshape(input_shape, param)); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH( + ShapeUtil::ReshapeIsBitcast(hlo->operand(0)->shape(), hlo->shape()), + "LayoutUtil::HasLayout"); +} + +// Check that the computation below doesn't crash the compiler. +// +// Within a fusion computation, only the parameters and result get assigned a +// layout. When we run the algebraic simplifier on this computation post layout +// assignment, it should not call TransposeIsBitcast on the `transpose` node +// inside the fusion computation as TransposeIsBitcast checks both input_shape +// and output_shape have layouts. +TEST_F(LayoutAssignmentTest, TransposeWithinFusionDoesNotCrash) { + const char* module_str = R"( + HloModule test_module + + fused_computation { + param_1 = f32[2,2,2]{2,1,0} parameter(1) + transpose = f32[2,2,2]{2,1,0} transpose(param_1), dimensions={0,2,1} + reduce_1 = f32[] parameter(0) + broadcast_1 = f32[2,2,2]{2,1,0} broadcast(reduce_1), dimensions={} + ROOT divide_1 = f32[2,2,2]{2,1,0} divide(transpose, broadcast_1) + } + + ENTRY entry_computation { + fusion.1 = f32[2,2,2]{2,1,0} parameter(1) + reduce.1 = f32[] parameter(0) + fusion.2 = f32[2,2,2]{2,1,0} fusion(reduce.1, fusion.1), kind=kLoop, calls=fused_computation + ROOT tuple.1 = (f32[2,2,2]{2,1,0}) tuple(fusion.2) + } + )"; + + auto module = tools::Parse(module_str).ValueOrDie(); + + module = + backend() + .compiler() + ->RunHloPasses(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .ConsumeValueOrDie(); + + EXPECT_EQ( + ::tensorflow::Status::OK(), + backend() + .compiler() + ->RunBackend(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .status()); +} + // A GTE inside of a fusion node inherits the layout of its operand (which // should, if we keep following operands, eventually be a parameter). TEST_F(LayoutAssignmentTest, GTEInheritsLayoutFromOperand) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index 6384c7f46f..f3642cf0a1 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -160,7 +160,8 @@ IrArray::Index IrArray::Index::SourceIndexOfReshape( } } - if (linear() != nullptr && + if (linear() != nullptr && LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape) && ShapeUtil::ReshapeIsBitcast(input_shape, output_shape)) { return Index(source_multidim_index, linear(), input_shape); } @@ -195,10 +196,13 @@ IrArray::Index IrArray::Index::SourceIndexOfTranspose( llvm::IRBuilder<>* builder) const { std::vector operand_multidim_index = Permute(dimension_mapping, multidim()); - if (linear() != nullptr && + + if (linear() != nullptr && LayoutUtil::HasLayout(operand_shape) && + LayoutUtil::HasLayout(shape) && ShapeUtil::TransposeIsBitcast(operand_shape, shape, dimension_mapping)) { return Index(operand_multidim_index, linear(), operand_shape); } + return Index(operand_multidim_index); } diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index a5f9b01f01..3ef0cdff67 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -106,20 +106,12 @@ static bool NotWorthHoistingIndividually(const HloInstruction& instruction) { case HloOpcode::kBitcast: case HloOpcode::kBroadcast: case HloOpcode::kConstant: + case HloOpcode::kReshape: case HloOpcode::kReverse: case HloOpcode::kSlice: + case HloOpcode::kTranspose: case HloOpcode::kTuple: return true; - - case HloOpcode::kTranspose: - return ShapeUtil::TransposeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape(), instruction.dimensions()); - - case HloOpcode::kReshape: - return ShapeUtil::ReshapeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape()); } } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 604e0173e7..3152789016 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1073,11 +1073,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping) { - // Can't insert bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) && - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) && LayoutUtil::IsPadded(output_shape)) { @@ -1106,11 +1103,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape) { - // Can't convert reshapes into bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) || - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) || LayoutUtil::IsPadded(output_shape)) { diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 19b1aa93bd..8ee263fe5e 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -522,12 +522,16 @@ class ShapeUtil { // Returns whether a transpose from input_shape to output_shape with dimension // mapping "dimension_mapping" produces a result which is bit-wise identical // to its input and thus may be replaced with a bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping); // Returns whether a reshape from "input_shape" to "output_shape" is a // bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape); -- GitLab From 6db1b213458ea7f0acd4476f70d930e15af8f35f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 16:01:04 -0800 Subject: [PATCH 108/311] [XLA] Add more supported dtypes to the local Python client. PiperOrigin-RevId: 187096144 --- tensorflow/compiler/xla/python/xla_client.py | 38 ++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 3b8ec851d5..90cda42f32 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -30,9 +30,9 @@ from tensorflow.compiler.xla import xla_data_pb2 from tensorflow.compiler.xla.python import pywrap_xla as c_api -# Most functions are snake_case for consistency with other modules, -# whereas method names of ComputationBuilder and LocalComputation are -# CamelCase for consistency with XLA. +# Most functions are snake_case for consistency with other modules, whereas +# method names of ComputationBuilder and LocalComputation are CamelCase for +# consistency with XLA. # pylint: disable=invalid-name @@ -123,24 +123,34 @@ _BINARY_OPS = [ 'Pow', ] + XLA_ELEMENT_TYPE_TO_DTYPE = { - xla_data_pb2.F32: np.dtype(np.float32), - xla_data_pb2.F64: np.dtype(np.float64), - xla_data_pb2.S32: np.dtype(np.int32), - xla_data_pb2.S64: np.dtype(np.int64), - xla_data_pb2.U32: np.dtype(np.uint32), - xla_data_pb2.U64: np.dtype(np.uint64), - xla_data_pb2.PRED: np.dtype(np.bool), + xla_data_pb2.PRED: np.dtype('bool'), + xla_data_pb2.S8: np.dtype('int8'), + xla_data_pb2.S16: np.dtype('int16'), + xla_data_pb2.S32: np.dtype('int32'), + xla_data_pb2.S64: np.dtype('int64'), + xla_data_pb2.U8: np.dtype('uint8'), + xla_data_pb2.U16: np.dtype('uint16'), + xla_data_pb2.U32: np.dtype('uint32'), + xla_data_pb2.U64: np.dtype('uint64'), + xla_data_pb2.F16: np.dtype('float16'), + xla_data_pb2.F32: np.dtype('float32'), + xla_data_pb2.F64: np.dtype('float64'), + xla_data_pb2.C64: np.dtype('complex64'), xla_data_pb2.TUPLE: np.dtype(np.object), } # Note the conversion on the key. Numpy has a known issue wherein dtype hashing # doesn't work as expected (https://github.com/numpy/numpy/issues/7242). Thus, # when keying by dtype in this dict, we use the string form of dtypes. -DTYPE_TO_XLA_ELEMENT_TYPE = { - str(v): k - for k, v in XLA_ELEMENT_TYPE_TO_DTYPE.items() -} +DTYPE_TO_XLA_ELEMENT_TYPE = {str(dt): et + for et, dt in XLA_ELEMENT_TYPE_TO_DTYPE.items()} + + +def dtype_to_etype(dtype): + """Convenience function for reading DTYPE_TO_XLA_ELEMENT_TYPE.""" + return DTYPE_TO_XLA_ELEMENT_TYPE[str(np.dtype(dtype))] class LocalBuffer(object): -- GitLab From c7caa2d87daa37b66811ac99f997ad02acd4ecc8 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Mon, 26 Feb 2018 16:23:46 -0800 Subject: [PATCH 109/311] Deprecate tf.contrib.learn. RELNOTES: Deprecated tf.contrib.learn. Please check contrib/learn/README.md for instructions on how to convert existing code. PiperOrigin-RevId: 187099439 --- .../python/framework/experimental_test.py | 1 - tensorflow/contrib/learn/README.md | 143 ++++++++++++++++++ tensorflow/contrib/learn/__init__.py | 7 +- tensorflow/contrib/learn/python/__init__.py | 7 +- .../contrib/learn/python/learn/__init__.py | 7 +- .../python/learn/basic_session_run_hooks.py | 43 +++++- .../learn/python/learn/datasets/__init__.py | 12 +- .../learn/python/learn/datasets/base.py | 26 +++- .../learn/python/learn/datasets/mnist.py | 23 ++- .../learn/datasets/produce_small_datasets.py | 7 +- .../learn/python/learn/datasets/synthetic.py | 10 +- .../python/learn/datasets/text_datasets.py | 10 +- .../learn/python/learn/estimators/__init__.py | 7 +- .../learn/python/learn/estimators/_sklearn.py | 4 +- .../learn/estimators/composable_model.py | 17 ++- .../python/learn/estimators/constants.py | 8 +- .../learn/python/learn/estimators/debug.py | 14 +- .../learn/python/learn/estimators/dnn.py | 19 ++- .../learn/estimators/dnn_linear_combined.py | 19 ++- .../learn/estimators/dynamic_rnn_estimator.py | 13 +- .../python/learn/estimators/estimator.py | 27 +++- .../learn/estimators/estimator_test_utils.py | 7 +- .../learn/python/learn/estimators/head.py | 20 ++- .../learn/python/learn/estimators/kmeans.py | 9 +- .../learn/python/learn/estimators/linear.py | 19 ++- .../learn/estimators/logistic_regressor.py | 10 +- .../python/learn/estimators/metric_key.py | 10 +- .../learn/python/learn/estimators/model_fn.py | 22 ++- .../python/learn/estimators/prediction_key.py | 8 +- .../python/learn/estimators/rnn_common.py | 7 +- .../python/learn/estimators/run_config.py | 19 ++- .../estimators/state_saving_rnn_estimator.py | 13 +- .../learn/python/learn/estimators/svm.py | 11 +- .../learn/estimators/tensor_signature.py | 11 +- .../python/learn/estimators/test_data.py | 7 +- .../contrib/learn/python/learn/evaluable.py | 11 +- .../contrib/learn/python/learn/experiment.py | 24 +-- .../learn/python/learn/export_strategy.py | 14 +- .../learn/python/learn/graph_actions.py | 8 +- .../learn/python/learn/learn_io/__init__.py | 7 +- .../learn/python/learn/learn_io/dask_io.py | 11 +- .../python/learn/learn_io/data_feeder.py | 29 +++- .../python/learn/learn_io/generator_io.py | 9 +- .../learn/python/learn/learn_io/graph_io.py | 16 +- .../learn/python/learn/learn_io/numpy_io.py | 9 +- .../learn/python/learn/learn_io/pandas_io.py | 12 +- .../learn/python/learn/learn_runner.py | 10 +- .../learn/python/learn/learn_runner_lib.py | 6 +- .../contrib/learn/python/learn/metric_spec.py | 13 +- .../contrib/learn/python/learn/models.py | 14 +- .../learn/python/learn/monitored_session.py | 6 +- .../contrib/learn/python/learn/monitors.py | 68 ++++++++- .../learn/python/learn/ops/__init__.py | 7 +- .../learn/python/learn/ops/embeddings_ops.py | 6 +- .../learn/python/learn/ops/losses_ops.py | 7 +- .../learn/python/learn/ops/seq2seq_ops.py | 12 +- .../python/learn/preprocessing/__init__.py | 7 +- .../python/learn/preprocessing/categorical.py | 15 +- .../preprocessing/categorical_vocabulary.py | 13 +- .../learn/python/learn/preprocessing/text.py | 26 +++- .../learn/python/learn/session_run_hook.py | 6 +- .../python/learn/summary_writer_cache.py | 5 +- .../contrib/learn/python/learn/trainable.py | 9 +- .../learn/python/learn/utils/__init__.py | 7 +- .../learn/python/learn/utils/export.py | 9 +- .../contrib/learn/python/learn/utils/gc.py | 13 +- .../python/learn/utils/input_fn_utils.py | 16 +- .../python/learn/utils/inspect_checkpoint.py | 2 +- .../learn/utils/saved_model_export_utils.py | 30 +++- tensorflow/python/util/decorator_utils.py | 2 +- 70 files changed, 945 insertions(+), 111 deletions(-) create mode 100644 tensorflow/contrib/learn/README.md diff --git a/tensorflow/contrib/framework/python/framework/experimental_test.py b/tensorflow/contrib/framework/python/framework/experimental_test.py index 8e54e09e04..cfdc7df7d8 100644 --- a/tensorflow/contrib/framework/python/framework/experimental_test.py +++ b/tensorflow/contrib/framework/python/framework/experimental_test.py @@ -49,7 +49,6 @@ class ExperimentalTest(test.TestCase): "\nTHIS FUNCTION IS EXPERIMENTAL. It may change or " "be removed at any time, and without warning." "\n" - "\n" "\nArgs:" "\n arg0: Arg 0." "\n arg1: Arg 1." diff --git a/tensorflow/contrib/learn/README.md b/tensorflow/contrib/learn/README.md new file mode 100644 index 0000000000..d516bffc5e --- /dev/null +++ b/tensorflow/contrib/learn/README.md @@ -0,0 +1,143 @@ +EVERYTHING IN THIS DIRECTORY IS DEPRECATED. + +Using functions or classes will result in warnings. + +Instructions for converting to current alternatives are included in the +warnings. A high-level overview is below. + +## Canned Estimators + +Many canned estimators (subclasses of `Estimator`) have equivalents in core: +`DNNClassifier`, `DNNRegressor`, `DNNEstimator`, `LinearClassifier`, +`LinearRegressor`, `DNNLinearCombinedClassifier` and +`DNNLinearCombinedRegressor`. They are exposed under `tf.estimator`. +`DNNEstimator`, `LinearEstimator` and `DNNLinearCombinedEstimator` +are exposed under `tf.contrib.estimator`. + +To migrate to the new api, users need to take the following steps: + +* Replace `tf.contrib.learn` with `tf.estimator`. +* If you subclass any of the estimators, stop doing that. You should be able to + write a factory method that returns a canned estimator instead. If this is not + possible (if you override methods from the canned estimator), consider writing + a custom estimator instead. See `tf.estimator.Estimator`. +* Set `loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE` to preserve loss + reduction as the average over batch. +* Some optimizer-related arguments are no longer passed in the estimator + constructor. Instead, we provide methods that perform the same job by wrapping + an optimizer. Specifically: + * `gradient_clip_norm`: Use `tf.contrib.estimator.clip_gradients_by_norm` + * `embedding_lr_multipliers`: Not supported. + Other arguments: + * `input_layer_min_slice_size`: Replaced by `input_layer_partitioner` + * `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. + * `feature_engineering_fn`: Not supported. You can call your + `feature_engineering_fn` inside your input_fn: + ```python + def new_input_fn(): + features, labels = old_input_fn() + return feature_engineering_fn(features, labels) + ``` +* Use `tf.reshape` to reshape labels in your `input_fn`. `tf.estimator` + classifiers and regressors expect labels as a 2D Tensor of shape + `[batch_size, 1]`, or `[batch_size, n_labels]`. In contrast, + `tf.contrib.learn` classifiers and regressors supported labels with shape + `[batch_size]`. +* If you pass custom metrics from the `evaluate()` method call, use + `tf.contrib.estimator.add_metrics`. +* Replace your `serving_input_fn` with a `serving_input_receiver_fn`. + Note this should be entirely distinct from your training `input_fn`, so if you + previously had one `input_fn` with different "modes", you should now factor + that apart. Where the former returned either a simple `(features, labels)` + tuple or `InputFnOps`, you should now return a `ServingInputReceiver`. + If you were generating your `serving_input_fn` using the + `build_parsing_serving_input_fn` helper, you can simply drop in the + replacement `build_parsing_serving_input_receiver_fn`. + +Some remaining estimators/classes: + +* `DynamicRnnEstimator`: Consider a custom `model_fn`. +* `KMeansClustering`: Use `tf.contrib.factorization.KMeansClustering`. +* `LogisticRegressor`: Not supported. Instead, use `binary_classification_head` + with a custom `model_fn`, or with `DNNEstimator`. +* `StateSavingRnnEstimator`: Consider a custom `model_fn`. +* SVM: Consider a custom `model_fn`. +* `LinearComposableModel` and `DNNComposableModel`: Not supported. + Consider `tf.contrib.estimator.DNNEstimator`, or write a custom model_fn. +* `MetricSpec`: Deprecated. For adding custom metrics to canned Estimators, use + `tf.contrib.estimator.add_metrics`. + +## Estimator +`tf.contrib.learn.Estimator` is migrated to `tf.estimator.Estimator`. + +To migrate, users need to take the following steps: + +* Replace `tf.contrib.learn.Estimator` with `tf.estimator.Estimator`. +* If you pass a `config` argument to `Estimator`, this must be + `tf.estimator.RunConfig`. You may need to edit your code accordingly. +* Edit your `model_fn` to return `tf.estimator.EstimatorSpec`. Refer to + `EstimatorSpec` for documentation of specific fields. +* If your `model_fn` uses the `mode` argument, use `tf.estimator.ModeKeys`. + +Some related classes: +* `Evaluable`, `Trainable`: Not supported, merged into `tf.estimator.Estimator`. +* ExportStrategy: Replaced by `tf.estimator.Exporter`. + +## Head/MultiHead +These classes are now supported under `tf.contrib.estimator`, e.g. +`tf.contrib.estimator.multi_class_head` and `tf.contrib.estimator.multi_head`. + +Some differences: + +* `multi_class_head`: If you use `tf.contrib.learn.multi_class_head` with + `n_classes=2`, switch to `tf.contrib.estimator.binary_classification_head`. +* `loss_only_head`: Not supported. +* `poisson_regression_head`: Not supported (yet). +* `binary_svm_head`: Not supported (yet). +* `no_op_train_fn`: Replace it with `tf.no_op`. + +Some arguments are renamed, please refer to documentation. In addition: + +* `loss_fn`: Supported for `multi_label_head`. If you need it for other heads, + please open an issue. +* `metric_class_ids`: Not supported (yet). +* `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. +* `label_name`: Not needed in `tf.estimator`. If you don’t use `multi_head`, + drop this argument. If you use `multi_head`, refer to + `tf.contrib.estimator.multi_head` documentation. + +## Experiment Class - Distributed Training Tooling + +Switch to `tf.estimator.train_and_evaluate`. Some differences: + +* Most of the constructor arguments, like `train_input_fn`, `eval_input_fn`, + should be wrapped into `tf.estimator.TrainSpec` and `tf.estimator.EvalSpec`. +* Remove the `experiment_fn`. Instead, create the `Estimator`, + `train_spec` and `eval_spec`, then call `tf.estimator.train_and_evaluate` + directly. +* Inside `tf.estimator.EvalSpec`, the `exporter` field is the replacement + for `export_strategy`. To be precise, `tf.estimator.LatestExporter` is the + replacement for `tf.contrib.learn.make_export_strategy`. If you want to export + only at the end of training use `tf.estimator.FinalExporter`. +* If the `TF_CONFIG` environment variable is constructed manually, please read + the `train_and_evaluate` documentation for the new requirementds (in + particular, the chief node and evaluator node). + +## Others Classes and Functions + +* `tf.contrib.learn.datasets` is deprecated. We are adding ready to use datasets + to tensorflow/models. Many smaller datasets are available from other sources, + such as scikits.learn. Some Python processing may have to be written, but this + is straightforward to implement using the standard modules. +* `tf.contrib.learn.preprocessing`: Deprecated. The python-only preprocessing + functions are not a good fit for TensorFlow. Please use `tf.data`, and + consider tensorflow/transform for more complex use cases. +* `tf.contrib.learn.models`: Not supported, use canned estimators instead. +* `tf.contrib.learn.monitors`: Implement `SessionRunHook` instead. Hook + implementations are in `tf.train`. +* `tf.contrib.learn.learn_io`: Use the methods in `tf.estimator.inputs`, such as + `tf.estimator.inputs.numpy_input_fn`. Some utility functions have no + equivalent, we encourage the use of `tf.data`. + diff --git a/tensorflow/contrib/learn/__init__.py b/tensorflow/contrib/learn/__init__.py index 3698af027e..79bd73faaf 100644 --- a/tensorflow/contrib/learn/__init__.py +++ b/tensorflow/contrib/learn/__init__.py @@ -13,8 +13,11 @@ # limitations under the License. # ============================================================================== -# TODO(ptucker,ipolosukhin): Improve descriptions. -"""High level API for learning. +"""High level API for learning (DEPRECATED). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. See the @{$python/contrib.learn} guide. diff --git a/tensorflow/contrib/learn/python/__init__.py b/tensorflow/contrib/learn/python/__init__.py index bbebd5ab97..df23aeb2c4 100644 --- a/tensorflow/contrib/learn/python/__init__.py +++ b/tensorflow/contrib/learn/python/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py index cdc67c77d5..76e0e8ac8f 100644 --- a/tensorflow/contrib/learn/python/learn/__init__.py +++ b/tensorflow/contrib/learn/python/learn/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py index 2284ec46e9..fed1c44d19 100644 --- a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py +++ b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py @@ -12,20 +12,47 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Some common SessionRunHook classes.""" +"""Some common SessionRunHook classes (deprected). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.training import basic_session_run_hooks +from tensorflow.python.util.deprecation import deprecated_alias # pylint: disable=invalid-name -LoggingTensorHook = basic_session_run_hooks.LoggingTensorHook -StopAtStepHook = basic_session_run_hooks.StopAtStepHook -CheckpointSaverHook = basic_session_run_hooks.CheckpointSaverHook -StepCounterHook = basic_session_run_hooks.StepCounterHook -NanLossDuringTrainingError = basic_session_run_hooks.NanLossDuringTrainingError -NanTensorHook = basic_session_run_hooks.NanTensorHook -SummarySaverHook = basic_session_run_hooks.SummarySaverHook +LoggingTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.LoggingTensorHook', + 'tf.train.LoggingTensorHook', + basic_session_run_hooks.LoggingTensorHook) +StopAtStepHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StopAtStepHook', + 'tf.train.StopAtStepHook', + basic_session_run_hooks.StopAtStepHook) +CheckpointSaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.CheckpointSaverHook', + 'tf.train.CheckpointSaverHook', + basic_session_run_hooks.CheckpointSaverHook) +StepCounterHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StepCounterHook', + 'tf.train.StepCounterHook', + basic_session_run_hooks.StepCounterHook) +NanLossDuringTrainingError = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanLossDuringTrainingError', + 'tf.train.NanLossDuringTrainingError', + basic_session_run_hooks.NanLossDuringTrainingError) +NanTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanTensorHook', + 'tf.train.NanTensorHook', + basic_session_run_hooks.NanTensorHook) +SummarySaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.SummarySaverHook', + 'tf.train.SummarySaverHook', + basic_session_run_hooks.SummarySaverHook) # pylint: enable=invalid-name diff --git a/tensorflow/contrib/learn/python/learn/datasets/__init__.py b/tensorflow/contrib/learn/python/learn/datasets/__init__.py index 7240b0de14..3c34712ac8 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/__init__.py +++ b/tensorflow/contrib/learn/python/learn/datasets/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Dataset utilities and synthetic/reference datasets.""" +"""Dataset utilities and synthetic/reference datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.contrib.learn.python.learn.datasets import mnist from tensorflow.contrib.learn.python.learn.datasets import synthetic from tensorflow.contrib.learn.python.learn.datasets import text_datasets +from tensorflow.python.util.deprecation import deprecated # Export load_iris and load_boston. load_iris = base.load_iris @@ -51,6 +57,7 @@ SYNTHETIC = { } +@deprecated(None, 'Please use tf.data.') def load_dataset(name, size='small', test_with_fake_data=False): """Loads dataset by name. @@ -73,8 +80,9 @@ def load_dataset(name, size='small', test_with_fake_data=False): return DATASETS[name]() +@deprecated(None, 'Please use tf.data.') def make_dataset(name, n_samples=100, noise=None, seed=42, *args, **kwargs): - """Creates binary synthetic datasets + """Creates binary synthetic datasets. Args: name: str, name of the dataset to generate diff --git a/tensorflow/contrib/learn/python/learn/datasets/base.py b/tensorflow/contrib/learn/python/learn/datasets/base.py index ca720ae5ed..3b5c9b97c0 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/base.py +++ b/tensorflow/contrib/learn/python/learn/datasets/base.py @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base utilities for loading datasets.""" + +"""Base utilities for loading datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +35,14 @@ import numpy as np from six.moves import urllib from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated + Dataset = collections.namedtuple('Dataset', ['data', 'target']) Datasets = collections.namedtuple('Datasets', ['train', 'validation', 'test']) +@deprecated(None, 'Use tf.data instead.') def load_csv_with_header(filename, target_dtype, features_dtype, @@ -53,6 +62,7 @@ def load_csv_with_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def load_csv_without_header(filename, target_dtype, features_dtype, @@ -70,6 +80,7 @@ def load_csv_without_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def shrink_csv(filename, ratio): """Create a smaller dataset of only 1/ratio of original data.""" filename_small = filename.replace('.', '_small.') @@ -84,6 +95,7 @@ def shrink_csv(filename, ratio): i += 1 +@deprecated(None, 'Use scikits.learn.datasets.') def load_iris(data_path=None): """Load Iris dataset. @@ -100,6 +112,7 @@ def load_iris(data_path=None): data_path, target_dtype=np.int, features_dtype=np.float) +@deprecated(None, 'Use scikits.learn.datasets.') def load_boston(data_path=None): """Load Boston housing dataset. @@ -116,7 +129,12 @@ def load_boston(data_path=None): data_path, target_dtype=np.float, features_dtype=np.float) -def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): +@deprecated(None, 'Use the retry module or similar alternatives.') +def retry(initial_delay, + max_delay, + factor=2.0, + jitter=0.25, + is_retriable=None): """Simple decorator for wrapping retriable functions. Args: @@ -152,7 +170,7 @@ def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): for delay in delays(): try: return fn(*args, **kwargs) - except Exception as e: # pylint: disable=broad-except) + except Exception as e: # pylint: disable=broad-except if is_retriable is None: continue @@ -176,11 +194,13 @@ def _is_retriable(e): return isinstance(e, IOError) and e.errno in _RETRIABLE_ERRNOS +@deprecated(None, 'Please use urllib or similar directly.') @retry(initial_delay=1.0, max_delay=16.0, is_retriable=_is_retriable) def urlretrieve_with_retry(url, filename=None): return urllib.request.urlretrieve(url, filename) +@deprecated(None, 'Please write your own downloading logic.') def maybe_download(filename, work_directory, source_url): """Download the data from source url, unless it's already here. diff --git a/tensorflow/contrib/learn/python/learn/datasets/mnist.py b/tensorflow/contrib/learn/python/learn/datasets/mnist.py index 37f9175015..abbb44c2f5 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/mnist.py +++ b/tensorflow/contrib/learn/python/learn/datasets/mnist.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functions for downloading and reading MNIST data.""" +"""Functions for downloading and reading MNIST data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated # CVDF mirror of http://yann.lecun.com/exdb/mnist/ DEFAULT_SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/' @@ -37,6 +43,7 @@ def _read32(bytestream): return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_images(f): """Extract the images into a 4D uint8 numpy array [index, y, x, depth]. @@ -65,6 +72,7 @@ def extract_images(f): return data +@deprecated(None, 'Please use tf.one_hot on tensors.') def dense_to_one_hot(labels_dense, num_classes): """Convert class labels from scalars to one-hot vectors.""" num_labels = labels_dense.shape[0] @@ -74,6 +82,7 @@ def dense_to_one_hot(labels_dense, num_classes): return labels_one_hot +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. @@ -103,7 +112,15 @@ def extract_labels(f, one_hot=False, num_classes=10): class DataSet(object): + """Container class for a dataset (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def __init__(self, images, labels, @@ -210,6 +227,8 @@ class DataSet(object): return self._images[start:end], self._labels[start:end] +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def read_data_sets(train_dir, fake_data=False, one_hot=False, @@ -275,5 +294,7 @@ def read_data_sets(train_dir, return base.Datasets(train=train, validation=validation, test=test) +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def load_mnist(train_dir='MNIST-data'): return read_data_sets(train_dir) diff --git a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py index 6e0ba38941..a4848fa64a 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Produce DBpedia datasets of a smaller size.""" +"""Produce DBpedia datasets of a smaller size (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py index 9a843168c2..6a0e3350b3 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py +++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Synthetic dataset generators.""" +"""Synthetic dataset generators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,8 +26,10 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.learn.python.learn.datasets.base import Dataset +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def circles(n_samples=100, noise=None, seed=None, @@ -93,6 +100,7 @@ def circles(n_samples=100, return Dataset(data=X[indices], target=y[indices]) +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def spirals(n_samples=100, noise=None, seed=None, diff --git a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py index 2596a2ecaf..ce94663017 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Text datasets.""" +"""Text datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,10 +31,12 @@ import numpy as np from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated DBPEDIA_URL = 'https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz' +@deprecated(None, 'See contrib/learn/README.md') def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') @@ -41,6 +48,7 @@ def maybe_download_dbpedia(data_dir): tfile.extractall(data_dir) +@deprecated(None, 'See contrib/learn/README.md') def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py index 4981750c94..3e64595f31 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py +++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""An estimator is a rule for calculating an estimate of a given quantity. +"""An estimator is a rule for calculating an estimate of a given quantity (deprecated). + +These classes are deprecated and replaced with `tf.estimator`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. # Estimators diff --git a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py index 15277415a1..1f0e4663d0 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== -"""sklearn cross-support.""" +"""sklearn cross-support (deprecated).""" from __future__ import absolute_import from __future__ import division @@ -132,6 +132,8 @@ class _TransformerMixin(): class NotFittedError(ValueError, AttributeError): """Exception class to raise if estimator is used before fitting. + USE OF THIS EXCEPTION IS DEPRECATED. + This class inherits from both ValueError and AttributeError to help with exception handling and backward compatibility. diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py index a02c726c74..1fa58271e2 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py +++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow composable models used as building blocks for estimators.""" +"""TensorFlow composable models used as building blocks for estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,6 +39,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated class _ComposableModel(object): @@ -46,6 +52,7 @@ class _ComposableModel(object): _ComposableModel and its subclasses are not part of the public tf.learn API. """ + @deprecated(None, "Please use model_fns in tf.estimator.") def __init__(self, num_label_columns, optimizer, @@ -141,6 +148,10 @@ class _ComposableModel(object): class LinearComposableModel(_ComposableModel): """A _ComposableModel that implements linear regression. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ @@ -252,6 +263,10 @@ class LinearComposableModel(_ComposableModel): class DNNComposableModel(_ComposableModel): """A _ComposableModel that implements a DNN. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ diff --git a/tensorflow/contrib/learn/python/learn/estimators/constants.py b/tensorflow/contrib/learn/python/learn/estimators/constants.py index fc69e81024..d2548946bc 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/constants.py +++ b/tensorflow/contrib/learn/python/learn/estimators/constants.py @@ -13,9 +13,11 @@ # limitations under the License. # ============================================================================== -"""Constants regarding Estimators. +"""Constants regarding Estimators (deprecated). -This file is obsoleted in the move of Estimator to core. +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ from __future__ import absolute_import from __future__ import division @@ -25,6 +27,8 @@ from __future__ import print_function class ProblemType(object): """Enum-like values for the type of problem that the model solves. + THIS CLASS IS DEPRECATED. + These values are used when exporting the model to produce the appropriate signature function for serving. diff --git a/tensorflow/contrib/learn/python/learn/estimators/debug.py b/tensorflow/contrib/learn/python/learn/estimators/debug.py index 9d5f6c2bf9..24b067b7e3 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/debug.py +++ b/tensorflow/contrib/learn/python/learn/estimators/debug.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Debug estimators. +"""Debug estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Debug estimators are bias-only estimators that can be used for debugging and as simple baselines. @@ -118,6 +122,10 @@ def debug_model_fn(features, labels, mode, params, config=None): class DebugClassifier(estimator.Estimator): """A classifier for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -237,6 +245,10 @@ class DebugClassifier(estimator.Estimator): class DebugRegressor(estimator.Estimator): """A regressor for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py index c17b41c0f7..eabebb7e88 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Deep Neural Network estimators.""" +"""Deep Neural Network estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -212,6 +217,10 @@ def _dnn_model_fn(features, labels, mode, params, config=None): class DNNClassifier(estimator.Estimator): """A classifier for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -521,6 +530,10 @@ class DNNClassifier(estimator.Estimator): class DNNRegressor(estimator.Estimator): """A regressor for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -796,6 +809,10 @@ class DNNRegressor(estimator.Estimator): class DNNEstimator(estimator.Estimator): """A Estimator for TensorFlow DNN models with user specified _Head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py index 7266122350..3d85533d92 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow estimators for Linear and DNN joined training models.""" +"""TensorFlow estimators for Linear and DNN joined training models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -372,6 +377,10 @@ def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): class DNNLinearCombinedEstimator(estimator.Estimator): """An estimator for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -490,6 +499,10 @@ class DNNLinearCombinedEstimator(estimator.Estimator): class DNNLinearCombinedClassifier(estimator.Estimator): """A classifier for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -832,6 +845,10 @@ class DNNLinearCombinedClassifier(estimator.Estimator): class DNNLinearCombinedRegressor(estimator.Estimator): """A regressor for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index 69440e823e..a703dc66e9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for Dynamic RNNs.""" +"""Estimator for Dynamic RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -540,6 +545,12 @@ def _get_dynamic_rnn_model_fn( class DynamicRnnEstimator(estimator.Estimator): + """Dynamically unrolled RNN (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 4b63e08ab3..5262e04e16 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base Estimator class.""" +"""Base Estimator class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -138,6 +143,7 @@ def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): return df.input_builder, df.get_feed_dict_fn() +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input_fn(input_fn): """Creates `FeatureColumn` objects for inputs defined by `input_fn`. @@ -158,6 +164,7 @@ def infer_real_valued_columns_from_input_fn(input_fn): return layers.infer_real_valued_columns(features) +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input(x): """Creates `FeatureColumn` objects for inputs defined by input `x`. @@ -389,6 +396,10 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, trainable.Trainable): """Abstract BaseEstimator class to train and evaluate TensorFlow models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Users should not instantiate or subclass this class. Instead, use an `Estimator`. """ @@ -399,6 +410,8 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, # TODO(wicke): Remove this once launcher takes over config functionality _Config = run_config.RunConfig # pylint: disable=invalid-name + @deprecated(None, 'Please replace uses of any Estimator from tf.contrib.learn' + ' with an Estimator from tf.estimator.*') def __init__(self, model_dir=None, config=None): """Initializes a BaseEstimator instance. @@ -1074,6 +1087,10 @@ def _identity_feature_engineering_fn(features, labels): class Estimator(BaseEstimator): """Estimator class is the basic TensorFlow model trainer/evaluator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ def __init__(self, @@ -1458,8 +1475,14 @@ class Estimator(BaseEstimator): # For time of deprecation x,y from Estimator allow direct access. # pylint: disable=protected-access class SKCompat(sklearn.BaseEstimator): - """Scikit learn wrapper for TensorFlow Learn Estimator.""" + """Scikit learn wrapper for TensorFlow Learn Estimator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please switch to the Estimator interface.') def __init__(self, estimator): self._estimator = estimator diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py index fd47710e30..e4c31396ba 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utils for Estimator.""" +"""Utils for Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 9b124b2c19..2b4b6eff39 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Abstractions for the head(s) of a model. +"""Abstractions for the head(s) of a model (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -47,11 +52,16 @@ from tensorflow.python.summary import summary from tensorflow.python.training import training from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated class Head(object): """Interface for the head/top of a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Given logits (or output of a hidden layer), a Head knows how to compute predictions, loss, default metric and export signature. It is meant to, @@ -177,6 +187,7 @@ class Head(object): raise NotImplementedError("Calling an abstract method.") +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -216,6 +227,7 @@ def regression_head(label_name=None, link_fn=(link_fn if link_fn is not None else array_ops.identity)) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def poisson_regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -254,6 +266,7 @@ def poisson_regression_head(label_name=None, # TODO(zakaria): Consider adding a _RegressionHead for logistic_regression +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_class_head(n_classes, label_name=None, weight_column_name=None, @@ -335,6 +348,7 @@ def multi_class_head(n_classes, label_keys=label_keys) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def binary_svm_head( label_name=None, weight_column_name=None, @@ -370,6 +384,7 @@ def binary_svm_head( thresholds=thresholds) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_label_head(n_classes, label_name=None, weight_column_name=None, @@ -430,6 +445,7 @@ def multi_label_head(n_classes, loss_fn=_wrap_custom_loss_fn(loss_fn) if loss_fn else None) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def loss_only_head(loss_fn, head_name=None): """Creates a Head that contains only loss terms. @@ -447,6 +463,7 @@ def loss_only_head(loss_fn, head_name=None): return _LossOnlyHead(loss_fn, head_name=head_name) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_head(heads, loss_weights=None): """Creates a MultiHead stemming from same logits/hidden layer. @@ -479,6 +496,7 @@ def multi_head(heads, loss_weights=None): return _MultiHead(heads, loss_merger=_weighted_loss_merger) +@deprecated(None, "Use 'lambda _: tf.no_op()'.") def no_op_train_fn(loss): del loss return control_flow_ops.no_op() diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py index 8f9d6fc318..66ebcfd1d8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py +++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementation of k-means clustering on top of `Estimator` API. +"""Implementation of k-means clustering on top of `Estimator` API (deprecated). This module is deprecated. Please use @{tf.contrib.factorization.KMeansClustering} instead of @@ -153,7 +153,12 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config): # TODO(agarwal,ands): support sharded input. class KMeansClustering(estimator.Estimator): - """An Estimator for K-Means clustering.""" + """An Estimator for K-Means clustering. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE COSINE_DISTANCE = clustering_ops.COSINE_DISTANCE RANDOM_INIT = clustering_ops.RANDOM_INIT diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index 37aa8b3396..64d7ecc68e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Linear Estimators.""" +"""Linear Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -305,6 +310,10 @@ class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): class LinearClassifier(estimator.Estimator): """Linear classifier model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear model to classify instances into one of multiple possible classes. When number of possible classes is 2, this is binary classification. @@ -625,6 +634,10 @@ class LinearClassifier(estimator.Estimator): class LinearRegressor(estimator.Estimator): """Linear regressor model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear regression model to predict label value given observation of feature values. @@ -860,6 +873,10 @@ class LinearRegressor(estimator.Estimator): class LinearEstimator(estimator.Estimator): """Linear model with user specified head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a generalized linear model to predict label value given observation of feature values. diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py index fb339160d5..3cbcc6e98d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py +++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Logistic regression (aka binary classifier) class. +"""Logistic regression (aka binary classifier) class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This defines some useful basic metrics for using logistic regression to classify a binary event (0 vs 1). @@ -75,6 +79,10 @@ def LogisticRegressor( # pylint: disable=invalid-name feature_engineering_fn=None): """Builds a logistic regression Estimator for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This method provides a basic Estimator with some additional metrics for custom binary classification models, including AUC, precision/recall and accuracy. diff --git a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py index 99388f116b..f264248e44 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for metric keys.""" +"""Enum for metric keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function class MetricKey(object): - """Metric key strings.""" + """Metric key strings (deprecated).""" + LOSS = "loss" AUC = "auc" AUC_PR = "auc_precision_recall" diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 44e6c7c52d..dcb161180c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Classes and methods related to model_fn.""" +"""Classes and methods related to model_fn (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -37,10 +42,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import session_run_hook +from tensorflow.python.util.deprecation import deprecated class ModeKeys(object): - """Standard names for model modes. + """Standard names for model modes (deprecated). + + THIS CLASS IS DEPRECATED. The following standard keys are defined: @@ -65,8 +73,16 @@ class ModelFnOps( 'output_alternatives', 'training_chief_hooks', 'training_hooks', 'scaffold', 'mode' ])): - """Ops returned from a model_fn.""" + """Ops returned from a model_fn. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'When switching to tf.estimator.Estimator, use ' + 'tf.estimator.EstimatorSpec. You can use the `estimator_spec`' + ' method to create an equivalent one.') def __new__(cls, mode, predictions=None, diff --git a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py index f8d87b8914..6fd2fc9d59 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for model prediction keys. +"""Enum for model prediction keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This file is obsoleted in the move of Estimator to core. """ @@ -22,6 +26,8 @@ from __future__ import print_function class PredictionKey(object): + """THIS CLASS IS DEPRECATED.""" + CLASSES = "classes" PROBABILITIES = "probabilities" LOGITS = "logits" diff --git a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py index 2752bc2d90..215022e5d9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py +++ b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Common operations for RNN Estimators.""" +"""Common operations for RNN Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index fd90fd1cc6..1d161093de 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Run Config.""" +"""Run Config (deprecated, use tf.estimator.RunConfig instead). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +34,12 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as core_run_config from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.util.deprecation import deprecated # A list of the property names in RunConfig user allows to change. They will # not affect the execution framework, so when execution framework checks the -# `uid` of the RunConfig, it should be ingored. +# `uid` of the RunConfig, it should be ignored. _DEFAULT_UID_WHITE_LIST = [ 'tf_random_seed', 'save_summary_steps', @@ -47,6 +53,7 @@ _DEFAULT_UID_WHITE_LIST = [ class Environment(object): + """DEPRECATED CLASS.""" # For running general distributed training. CLOUD = 'cloud' # For running Google-internal distributed training. @@ -56,6 +63,7 @@ class Environment(object): class TaskType(object): + """DEPRECATED CLASS.""" MASTER = 'master' PS = 'ps' WORKER = 'worker' @@ -64,6 +72,8 @@ class TaskType(object): class ClusterConfig(object): """This class specifies the configurations for a distributed run. + THIS CLASS IS DEPRECATED. Use tf.estimator.RunConfig instead. + If you're using an `Estimator`, you should probably use the subclass RunConfig instead. """ @@ -211,10 +221,13 @@ class ClusterConfig(object): class RunConfig(ClusterConfig, core_run_config.RunConfig): """This class specifies the configurations for an `Estimator` run. - This class is the implementation of @{tf.estimator.RunConfig} interface. + This class is a deprecated implementation of @{tf.estimator.RunConfig} + interface. """ _USE_DEFAULT = 0 + @deprecated(None, 'When switching to tf.estimator.Estimator, use' + ' tf.estimator.RunConfig instead.') def __init__(self, master=None, num_cores=0, diff --git a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py index 0cea35e219..de78c72c3a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for State Saving RNNs.""" +"""Estimator for State Saving RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -528,6 +533,12 @@ def _get_rnn_model_fn(cell_type, class StateSavingRnnEstimator(estimator.Estimator): + """RNN with static unrolling and state saving (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py index 72920d73c0..3459997bab 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/svm.py +++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Support Vector Machine (SVM) Estimator.""" +"""Support Vector Machine (SVM) Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -36,6 +41,10 @@ def _as_iterable(preds, output): class SVM(estimator.Estimator): """Support Vector Machine (SVM) model for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Currently, only linear SVMs are supported. For the underlying optimization problem, the `SDCAOptimizer` is used. For performance and convergence tuning, the num_loss_partitions parameter passed to `SDCAOptimizer` (see `__init__()` diff --git a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py index a120bc6cc3..71b5658dd1 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py +++ b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorSignature class and utilities.""" +"""TensorSignature class and utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -33,6 +38,10 @@ class TensorSignature(collections.namedtuple( "TensorSignature", ["dtype", "shape", "is_sparse"])): """Signature of the `Tensor` object. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Useful to check compatibility of tensors. Example: diff --git a/tensorflow/contrib/learn/python/learn/estimators/test_data.py b/tensorflow/contrib/learn/python/learn/estimators/test_data.py index ed201bfc58..e4b057b4f5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/test_data.py +++ b/tensorflow/contrib/learn/python/learn/estimators/test_data.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Test data utilities.""" +"""Test data utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/evaluable.py b/tensorflow/contrib/learn/python/learn/evaluable.py index 8f6cd39864..10881ca885 100644 --- a/tensorflow/contrib/learn/python/learn/evaluable.py +++ b/tensorflow/contrib/learn/python/learn/evaluable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Evaluable` interface.""" +"""`Evaluable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,10 @@ import abc class Evaluable(object): """Interface for objects that are evaluatable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 331bc11549..9a7c4cd685 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experiment class collecting information needed for a single training run.""" +"""Experiment class collecting information for a single training run (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -25,7 +30,6 @@ import os import time from tensorflow.contrib.framework import deprecated -from tensorflow.contrib.framework import deprecated_args from tensorflow.contrib.framework.python.framework import experimental from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import export_strategy @@ -118,6 +122,10 @@ class _EvalAndExportListener(basic_session_run_hooks.CheckpointSaverListener): class Experiment(object): """Experiment is a class containing all information needed to train a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + After an experiment is created (by passing an Estimator and inputs for training and evaluation), an Experiment instance knows how to invoke training and eval loops in a sensible fashion for distributed training. @@ -125,16 +133,8 @@ class Experiment(object): # TODO(ispir): remove delay_workers_by_global_step and make global step based # waiting as only behavior. - @deprecated_args( - "2016-10-23", - "local_eval_frequency is deprecated as local_run will be renamed to " - "train_and_evaluate. Use min_eval_frequency and call train_and_evaluate " - "instead. Note, however, that the default for min_eval_frequency is 1, " - "meaning models will be evaluated every time a new checkpoint is " - "available. In contrast, the default for local_eval_frequency is None, " - "resulting in evaluation occurring only after training has completed. " - "min_eval_frequency is ignored when calling the deprecated local_run.", - "local_eval_frequency") + @deprecated(None, "Please switch to tf.estimator.train_and_evaluate. You will" + " also have to convert to a tf.estimator.Estimator.") def __init__(self, estimator, train_input_fn, diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py index 55a8b82431..075cab536e 100644 --- a/tensorflow/contrib/learn/python/learn/export_strategy.py +++ b/tensorflow/contrib/learn/python/learn/export_strategy.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""ExportStrategy class represents different flavors of model export.""" +"""ExportStrategy class represents different flavors of model export (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,7 @@ from __future__ import print_function import collections from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated __all__ = ['ExportStrategy'] @@ -30,6 +36,10 @@ class ExportStrategy( ['name', 'export_fn', 'strip_default_attrs'])): """A class representing a type of model export. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Typically constructed by a utility function specific to the exporter, such as `saved_model_export_utils.make_export_strategy()`. @@ -56,6 +66,8 @@ class ExportStrategy( forward compatibility of the resulting `SavedModel`. """ + @deprecated(None, 'Please switch to tf.estimator.train_and_evaluate, and use ' + 'tf.estimator.Exporter.') def __new__(cls, name, export_fn, strip_default_attrs=None): return super(ExportStrategy, cls).__new__( cls, name, export_fn, strip_default_attrs) diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py index 98365c05f6..a997fab723 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level operations on graphs.""" +"""High level operations on graphs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -68,6 +73,7 @@ def clear_summary_writers(): return summary_io.SummaryWriterCache.clear() +@deprecated(None, 'Use `SummaryWriterCache.get` directly.') def get_summary_writer(logdir): """Returns single SummaryWriter per logdir in current run. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py index 06c3782a47..8b133a4440 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tools to allow different io formats.""" +"""Tools to allow different io formats (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py index 7d666391ce..e0a1948d95 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Methods to allow dask.DataFrame.""" +"""Methods to allow dask.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,8 @@ from __future__ import print_function import numpy as np +from tensorflow.python.util.deprecation import deprecated + try: # pylint: disable=g-import-not-at-top import dask.dataframe as dd @@ -60,6 +67,7 @@ def _construct_dask_df_with_divisions(df): return dd.Series(merge(dsk, df.dask), name, df.name, divisions) +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_data(data): """Extract data from dask.Series or dask.DataFrame for predictors. @@ -81,6 +89,7 @@ def extract_dask_data(data): return data +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_labels(labels): """Extract data from dask.Series or dask.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 96be8b1bc4..c45b1d1864 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementations of different data feeders to provide data for TF trainer.""" +"""Implementations of different data feeders to provide data for TF trainer (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" # TODO(ipolosukhin): Replace this module with feed-dict queue runners & queues. @@ -31,6 +36,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels @@ -101,6 +107,7 @@ def _is_iterable(x): return hasattr(x, 'next') or hasattr(x, '__next__') +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_train_data_feeder(x, y, n_classes, @@ -188,6 +195,7 @@ def _batch_data(x, batch_size=None): yield np.matrix(chunk) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_predict_data_feeder(x, batch_size=None): """Returns an iterable for feeding into predict step. @@ -219,6 +227,7 @@ def setup_predict_data_feeder(x, batch_size=None): return [x] +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_processor_data_feeder(x): """Sets up processor iterable. @@ -233,6 +242,7 @@ def setup_processor_data_feeder(x): return x +@deprecated(None, 'Please convert numpy dtypes explicitly.') def check_array(array, dtype): """Checks array on dtype and converts it if different. @@ -275,8 +285,14 @@ def _check_dtype(dtype): class DataFeeder(object): - """Data feeder is an example class to sample data for TF trainer.""" + """Data feeder is an example class to sample data for TF trainer. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, x, y, @@ -563,6 +579,10 @@ class DataFeeder(object): class StreamingDataFeeder(DataFeeder): """Data feeder for TF trainer that reads data from iterator. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Streaming data feeder allows to read data as it comes it from disk or somewhere else. It's custom to have this iterators rotate infinetly over the dataset, to allow control of how much to learn on the trainer side. @@ -771,11 +791,16 @@ class StreamingDataFeeder(DataFeeder): class DaskDataFeeder(object): """Data feeder for that reads data from dask.Series and dask.DataFrame. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Numpy arrays can be serialized to disk and it's possible to do random seeks into them. DaskDataFeeder will remove requirement to have full dataset in the memory and still do random seeks for sampling of batches. """ + @deprecated(None, 'Please feed input to tf.data to support dask.') def __init__(self, x, y, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py index 884faf8335..f8aaa0c9e3 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow generator of dict with numpy arrays.""" +"""Methods to allow generator of dict with numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,8 +28,10 @@ from types import FunctionType from types import GeneratorType from tensorflow.python.estimator.inputs.queues.feeding_functions import _enqueue_data as enqueue_data +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.data.') def generator_input_fn(x, target_key=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py index 3a46c23968..9e816f54b6 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to read data in the graph.""" +"""Methods to read data in the graph (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,11 +39,13 @@ from tensorflow.python.platform import gfile from tensorflow.python.summary import summary from tensorflow.python.training import input as input_ops from tensorflow.python.training import queue_runner +from tensorflow.python.util.deprecation import deprecated # Default name for key in the feature dict. KEY_FEATURE_NAME = '__key__' +@deprecated(None, 'Use tf.data.') def read_batch_examples(file_pattern, batch_size, reader, @@ -106,6 +113,7 @@ def read_batch_examples(file_pattern, return examples +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples(file_pattern, batch_size, reader, @@ -175,6 +183,7 @@ def read_keyed_batch_examples(file_pattern, seed=seed) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples_shared_queue(file_pattern, batch_size, reader, @@ -452,6 +461,7 @@ def _read_keyed_batch_examples_helper(file_pattern, return queued_examples_with_keys +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features(file_pattern, batch_size, features, @@ -540,6 +550,7 @@ def read_keyed_batch_features(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features_shared_queue(file_pattern, batch_size, features, @@ -620,6 +631,7 @@ def read_keyed_batch_features_shared_queue(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def queue_parsed_features(parsed_features, keys=None, feature_queue_capacity=100, @@ -742,6 +754,7 @@ def queue_parsed_features(parsed_features, return dequeued_keys, dequeued_parsed_features +@deprecated(None, 'Use tf.data.') def read_batch_features(file_pattern, batch_size, features, @@ -821,6 +834,7 @@ def read_batch_features(file_pattern, return features +@deprecated(None, 'Use tf.data.') def read_batch_record_features(file_pattern, batch_size, features, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py index 692438807f..29552d24f1 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py @@ -12,15 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow dict of numpy arrays.""" +"""Methods to allow dict of numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.numpy_io import numpy_input_fn as core_numpy_input_fn +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Use tf.estimator.inputs.numpy_input_fn.') def numpy_input_fn(x, y=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py index ede7558eaf..b4ef055f5a 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py @@ -13,13 +13,19 @@ # limitations under the License. # ============================================================================== -"""Methods to allow pandas.DataFrame.""" +"""Methods to allow pandas.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.pandas_io import pandas_input_fn as core_pandas_input_fn +from tensorflow.python.util.deprecation import deprecated try: # pylint: disable=g-import-not-at-top @@ -47,6 +53,7 @@ PANDAS_DTYPES = { } +@deprecated(None, 'Please use tf.estimator.inputs.pandas_input_fn') def pandas_input_fn(x, y=None, batch_size=128, @@ -66,6 +73,7 @@ def pandas_input_fn(x, target_column=target_column) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_data(data): """Extract data from pandas.DataFrame for predictors. @@ -96,6 +104,7 @@ def extract_pandas_data(data): 'float, or bool. Found: ' + ', '.join(error_report)) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_matrix(data): """Extracts numpy matrix from pandas DataFrame. @@ -111,6 +120,7 @@ def extract_pandas_matrix(data): return data.as_matrix() +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_labels(labels): """Extract data from pandas.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner.py b/tensorflow/contrib/learn/python/learn/learn_runner.py index 2af723a0d6..d719a3e488 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Runs an Experiment.""" +"""Runs an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ from tensorflow.contrib.learn.python.learn.estimators import run_config as run_c from tensorflow.contrib.learn.python.learn.experiment import Experiment from tensorflow.contrib.training.python.training import hparam as hparam_lib from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # TODO(xiejw): Refactor the learn_runner to make code reusable. @@ -99,6 +105,7 @@ def _wrapped_experiment_fn_with_uid_check(experiment_fn, require_hparams=False): return wrapped_experiment_fn +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def run(experiment_fn, output_dir=None, schedule=None, run_config=None, hparams=None): """Make and run an experiment. @@ -218,6 +225,7 @@ def run(experiment_fn, output_dir=None, schedule=None, run_config=None, return _execute_schedule(experiment, schedule) +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def tune(experiment_fn, tuner): """Tune an experiment with hyper-parameters. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py index 7d9b1c7716..ba2d067787 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities to run and tune an Experiment. +"""Utilities to run and tune an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@run @@tune diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py index 6440bc204b..97220365d5 100644 --- a/tensorflow/contrib/learn/python/learn/metric_spec.py +++ b/tensorflow/contrib/learn/python/learn/metric_spec.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""The metric spec class to flexibly connect models and metrics.""" +"""The metric spec class to flexibly connect models and metrics (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ import six from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated def _assert_named_args(sentinel): @@ -223,6 +229,10 @@ def _adapt_metric_fn( class MetricSpec(object): """MetricSpec connects a model to metric functions. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + The MetricSpec class contains all information necessary to connect the output of a `model_fn` to the metrics (usually, streaming metrics) that are used in evaluation. @@ -284,6 +294,7 @@ class MetricSpec(object): """ + @deprecated(None, 'Use tf.estimator.EstimatorSpec.eval_metric_ops.') def __init__(self, metric_fn, prediction_key=None, diff --git a/tensorflow/contrib/learn/python/learn/models.py b/tensorflow/contrib/learn/python/learn/models.py index 4283240d01..bd4bbf9f8c 100644 --- a/tensorflow/contrib/learn/python/learn/models.py +++ b/tensorflow/contrib/learn/python/learn/models.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Various high level TF models.""" +"""Various high level TF models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -28,8 +33,10 @@ from tensorflow.python.ops import array_ops as array_ops_ from tensorflow.python.ops import init_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using a tf.estimator.LinearRegressor') def linear_regression_zero_init(x, y): """Linear regression subgraph with zero-value initial weights and bias. @@ -43,6 +50,7 @@ def linear_regression_zero_init(x, y): return linear_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.LinearClassifier') def logistic_regression_zero_init(x, y): """Logistic regression subgraph with zero-value initial weights and bias. @@ -56,6 +64,7 @@ def logistic_regression_zero_init(x, y): return logistic_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.') def linear_regression(x, y, init_mean=None, init_stddev=1.0): """Creates linear regression TensorFlow subgraph. @@ -107,6 +116,7 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0): return losses_ops.mean_squared_error_regressor(x, y, weights, bias) +@deprecated(None, 'Consider using a class from tf.estimator.') def logistic_regression(x, y, class_weight=None, @@ -203,6 +213,7 @@ def _reverse_seq(input_seq, lengths): return result +@deprecated(None, 'Please consider `tf.nn.bidirectional_dynamic_rnn`.') def bidirectional_rnn(cell_fw, cell_bw, inputs, @@ -283,6 +294,7 @@ def bidirectional_rnn(cell_fw, # End of TensorFlow 0.7 +@deprecated(None, 'Please consider tensorflow/tensor2tensor.') def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, target_predictor_fn, sequence_length, initial_state, attn_length, attn_size, attn_vec_size): diff --git a/tensorflow/contrib/learn/python/learn/monitored_session.py b/tensorflow/contrib/learn/python/learn/monitored_session.py index 22602e9f69..ac0433f177 100644 --- a/tensorflow/contrib/learn/python/learn/monitored_session.py +++ b/tensorflow/contrib/learn/python/learn/monitored_session.py @@ -13,7 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A wrapper of Session API which runs hooks.""" +"""A wrapper of Session API which runs hooks (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index 9457a73ecf..77f7c73d54 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Monitors instrument the training process. +"""Monitors instrument the training process (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@get_default_monitors @@BaseMonitor @@ -59,6 +63,10 @@ from tensorflow.python.util import tf_inspect class BaseMonitor(object): """Base class for Monitors. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Defines basic interfaces of Monitors. Monitors can either be run on all workers or, more commonly, restricted to run exclusively on the elected chief worker. @@ -229,6 +237,10 @@ def _extract_output(outputs, request): class EveryN(BaseMonitor): """Base class for monitors that execute callbacks every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This class adds three new callbacks: - every_n_step_begin - every_n_step_end @@ -418,6 +430,10 @@ class StopAtStep(BaseMonitor): class PrintTensor(EveryN): """Prints given tensors every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This is an `EveryN` monitor and has consistent semantic for `every_n` and `first_n`. @@ -455,9 +471,12 @@ class PrintTensor(EveryN): class LoggingTrainable(EveryN): """Writes trainable variable values into log every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Write the tensors in trainable variables `every_n` steps, starting with the `first_n`th step. - """ def __init__(self, scope=None, every_n=100, first_n=1): @@ -493,7 +512,12 @@ class LoggingTrainable(EveryN): class SummarySaver(EveryN): - """Saves summaries every N steps.""" + """Saves summaries every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, summary_op, @@ -554,6 +578,10 @@ class SummarySaver(EveryN): class ValidationMonitor(EveryN): """Runs evaluation of a given estimator, at most every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note that the evaluation is done based on the saved checkpoint, which will usually be older than the current step. @@ -756,6 +784,10 @@ class ValidationMonitor(EveryN): class CaptureVariable(EveryN): """Captures a variable's values into a collection. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This monitor is useful for unit testing. You should exercise caution when using this monitor in production, since it never discards values. @@ -794,6 +826,7 @@ class CaptureVariable(EveryN): self._var_values[step] = _extract_output(outputs, self._var_name) +@deprecation.deprecated(None, "Use tf.train.MonitoredTrainingSession.") def get_default_monitors(loss_op=None, summary_op=None, save_summary_steps=100, @@ -828,6 +861,10 @@ def get_default_monitors(loss_op=None, class GraphDump(BaseMonitor): """Dumps almost all tensors in the graph at every step. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note, this is very expensive, prefer `PrintTensor` in production. """ @@ -917,7 +954,12 @@ class GraphDump(BaseMonitor): class ExportMonitor(EveryN): - """Monitor that exports Estimator every N steps.""" + """Monitor that exports Estimator every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ @deprecation.deprecated("2017-03-25", "ExportMonitor is deprecated. Please pass an " @@ -1040,7 +1082,12 @@ class ExportMonitor(EveryN): class CheckpointSaver(BaseMonitor): - """Saves checkpoints every N steps or N seconds.""" + """Saves checkpoints every N steps or N seconds. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, checkpoint_dir, @@ -1125,7 +1172,12 @@ class CheckpointSaver(BaseMonitor): class StepCounter(EveryN): - """Steps per second monitor.""" + """Steps per second monitor. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None): super(StepCounter, self).__init__(every_n_steps=every_n_steps) @@ -1165,6 +1217,10 @@ class NanLossDuringTrainingError(RuntimeError): class NanLoss(EveryN): """NaN Loss monitor. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Monitors loss and stops training if loss is NaN. Can either fail with exception or just stop training. """ diff --git a/tensorflow/contrib/learn/python/learn/ops/__init__.py b/tensorflow/contrib/learn/python/learn/ops/__init__.py index 33962e34cc..efb1f47cf5 100644 --- a/tensorflow/contrib/learn/python/learn/ops/__init__.py +++ b/tensorflow/contrib/learn/python/learn/ops/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Various TensorFlow Ops.""" +"""Various TensorFlow Ops (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index fa3b7323e3..b3b067b8e1 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops to work with embeddings. +"""TensorFlow Ops to work with embeddings (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Note: categorical variables are handled via embeddings in many cases. For example, in case of words. diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py index b040ab3bb6..92976d1539 100644 --- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for loss computation.""" +"""TensorFlow Ops for loss computation (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py index 45727faab4..aa37cb4a76 100644 --- a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for Sequence to Sequence models.""" +"""TensorFlow Ops for Sequence to Sequence models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,8 +31,10 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. @@ -57,6 +64,7 @@ def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): return array_ops.stack(predictions, axis=1), loss +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): """Processes inputs for Sequence to Sequence models. @@ -87,6 +95,7 @@ def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): return in_x, in_y, out_y +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): """RNN Decoder that creates training and sampling sub-graphs. @@ -123,6 +132,7 @@ def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): return outputs, states, sampling_outputs, sampling_states +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_seq2seq(encoder_inputs, decoder_inputs, encoder_cell, diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py index 7bcc177d4e..e8c6e1acf8 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Preprocessing tools useful for building models.""" +"""Preprocessing tools useful for building models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py index 154739d497..faba3b2025 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements preprocessing transformers for categorical variables.""" +"""Implements preprocessing transformers for categorical variables (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,8 @@ from __future__ import print_function import math import numpy as np +from tensorflow.python.util.deprecation import deprecated + # pylint: disable=g-bad-import-order from . import categorical_vocabulary from ..learn_io.data_feeder import setup_processor_data_feeder @@ -31,10 +38,16 @@ from ..learn_io.data_feeder import setup_processor_data_feeder class CategoricalProcessor(object): """Maps documents to sequences of word ids. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + As a common convention, Nan values are handled as unknown tokens. Both float('nan') and np.nan are accepted. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data for sequence ' + 'processing.') def __init__(self, min_frequency=0, share=False, vocabularies=None): """Initializes a CategoricalProcessor instance. diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py index 5709955c49..3ac370a6ab 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""Categorical vocabulary classes to map categories to indexes. +"""Categorical vocabulary classes to map categories to indexes (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Can be used for categorical variables, sparse variables and words. """ @@ -25,14 +29,21 @@ from __future__ import print_function import collections import six +from tensorflow.python.util.deprecation import deprecated + class CategoricalVocabulary(object): """Categorical variables vocabulary class. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Accumulates and provides mapping from classes to indexes. Can be easily used for words. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, unknown_token="", support_reverse=True): self._unknown_token = unknown_token self._mapping = {unknown_token: 0} diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/text.py b/tensorflow/contrib/learn/python/learn/preprocessing/text.py index 3af2074c2a..f2b6776be7 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/text.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/text.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements a number of text preprocessing utilities.""" +"""Implements a number of text preprocessing utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -24,6 +29,7 @@ import numpy as np import six from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated from .categorical_vocabulary import CategoricalVocabulary # pylint: disable=g-bad-import-order @@ -38,6 +44,7 @@ TOKENIZER_RE = re.compile(r"[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", re.UNICODE) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def tokenizer(iterator): """Tokenizer generator. @@ -51,9 +58,16 @@ def tokenizer(iterator): yield TOKENIZER_RE.findall(value) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') class ByteProcessor(object): - """Maps documents into sequence of ids for bytes.""" + """Maps documents into sequence of ids for bytes. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length): self.max_document_length = max_document_length @@ -108,8 +122,14 @@ class ByteProcessor(object): class VocabularyProcessor(object): - """Maps documents to sequences of word ids.""" + """Maps documents to sequences of word ids. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length, min_frequency=0, diff --git a/tensorflow/contrib/learn/python/learn/session_run_hook.py b/tensorflow/contrib/learn/python/learn/session_run_hook.py index a8ba2be972..87edc9b720 100644 --- a/tensorflow/contrib/learn/python/learn/session_run_hook.py +++ b/tensorflow/contrib/learn/python/learn/session_run_hook.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""This file is deprecated. Use tensorflow.python.training.session_run_hook.""" +"""This file is deprecated. Use `tensorflow.python.training.session_run_hook`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py index 919d415c30..d663cf5fb7 100644 --- a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py +++ b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Wrapper for a Session-like object that handles threads and recovery. +"""Wrapper for a Session-like object that handles threads and recovery (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. Based on an original design of Illia Polosukhin. """ diff --git a/tensorflow/contrib/learn/python/learn/trainable.py b/tensorflow/contrib/learn/python/learn/trainable.py index 429b6040be..a1a3f20dcd 100644 --- a/tensorflow/contrib/learn/python/learn/trainable.py +++ b/tensorflow/contrib/learn/python/learn/trainable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Trainable` interface.""" +"""`Trainable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,8 @@ import abc class Trainable(object): """Interface for objects that are trainable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/utils/__init__.py b/tensorflow/contrib/learn/python/learn/utils/__init__.py index 48978d0ac3..66d8dc6fd4 100644 --- a/tensorflow/contrib/learn/python/learn/utils/__init__.py +++ b/tensorflow/contrib/learn/python/learn/utils/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Learn Utils.""" +"""TensorFlow Learn Utils (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py index cb34cb1d26..3eacac7a3d 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export.py +++ b/tensorflow/contrib/learn/python/learn/utils/export.py @@ -13,14 +13,18 @@ # limitations under the License. # ============================================================================== -"""Export utilities.""" +"""Export utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.framework import deprecated -from tensorflow.python.training import training_util from tensorflow.contrib.session_bundle import exporter from tensorflow.contrib.session_bundle import gc from tensorflow.python.client import session as tf_session @@ -32,6 +36,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import saver as tf_saver +from tensorflow.python.training import training_util @deprecated('2017-03-25', 'Please use Estimator.export_savedmodel() instead.') diff --git a/tensorflow/contrib/learn/python/learn/utils/gc.py b/tensorflow/contrib/learn/python/learn/utils/gc.py index 226915987a..916aecbea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/gc.py +++ b/tensorflow/contrib/learn/python/learn/utils/gc.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -r"""System for specifying garbage collection (GC) of path based data. +r"""System for specifying garbage collection (GC) of path based data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This framework allows for GC of data specified by path names, for example files on disk. gc.Path objects each represent a single item stored at a path and may @@ -73,10 +77,12 @@ import os from tensorflow.python.platform import gfile from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated Path = collections.namedtuple('Path', 'path export_version') +@deprecated(None, 'Please implement your own file management or use Saver.') def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. @@ -97,6 +103,7 @@ def largest_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def one_of_every_n_export_versions(n): """Creates a filter that keeps one of every n export versions. @@ -128,6 +135,7 @@ def one_of_every_n_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def mod_export_version(n): """Creates a filter that keeps every export that is a multiple of n. @@ -146,6 +154,7 @@ def mod_export_version(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def union(lf, rf): """Creates a filter that keeps the union of two filters. @@ -163,6 +172,7 @@ def union(lf, rf): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def negation(f): """Negate a filter. @@ -179,6 +189,7 @@ def negation(f): return keep +@deprecated(None, 'Please implement your own file name management.') def get_paths(base_dir, parser): """Gets a list of Paths in a given directory. diff --git a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py index b2521933e5..b92eb9fea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities for creating input_fns. +"""Utilities for creating input_fns (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Contents of this file are moved to tensorflow/python/estimator/export.py. InputFnOps is renamed to ServingInputReceiver. @@ -32,13 +36,17 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.util.deprecation import deprecated class InputFnOps(collections.namedtuple('InputFnOps', ['features', 'labels', 'default_inputs'])): - """A return type for an input_fn. + """A return type for an input_fn (deprecated). + + THIS CLASS IS DEPRECATED. Please use tf.estimator.export.ServingInputReceiver + instead. This return type is currently only supported for serving input_fn. Training and eval input_fn should return a `(features, labels)` tuple. @@ -56,6 +64,8 @@ class InputFnOps(collections.namedtuple('InputFnOps', """ +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_parsing_serving_input_receiver_fn.') def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): """Build an input_fn appropriate for serving, expecting fed tf.Examples. @@ -84,6 +94,8 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): return input_fn +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_raw_serving_input_receiver_fn.') def build_default_serving_input_fn(features, default_batch_size=None): """Build an input_fn appropriate for serving, expecting feature Tensors. diff --git a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py index 6a63fb545a..6dbaa15f83 100644 --- a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py +++ b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A simple script for inspect checkpoint files.""" +"""A simple script for inspect checkpoint files (deprecated).""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 1593380007..213619a187 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities supporting export to SavedModel. +"""Utilities supporting export to SavedModel (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Some contents of this file are moved to tensorflow/python/estimator/export.py: @@ -52,8 +56,9 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.summary import summary_iterator from tensorflow.python.training import saver - from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated + # A key for use in the input_alternatives dict indicating the default input. # This is the input that will be expected when a serving request does not @@ -77,6 +82,7 @@ FEATURES_INPUT_ALTERNATIVE_KEY = 'features_input_alternative' _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY = 'default_output_alternative' +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_standardized_signature_def(input_tensors, output_tensors, problem_type): """Build a SignatureDef using problem type and input and output Tensors. @@ -156,6 +162,7 @@ def _is_regression_problem(problem_type, input_tensors, output_tensors): len(input_tensors) == 1 and len(output_tensors) == 1) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_input_alternatives(input_ops): """Obtain all input alternatives using the input_fn output and heuristics.""" input_alternatives = {} @@ -181,6 +188,7 @@ def get_input_alternatives(input_ops): return input_alternatives, features +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): """Obtain all output alternatives using the model_fn output and heuristics. @@ -246,6 +254,7 @@ def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): sorted(output_alternatives.keys()))) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_all_signature_defs(input_alternatives, output_alternatives, actual_default_output_alternative_key): """Build `SignatureDef`s from all pairs of input and output alternatives.""" @@ -279,6 +288,7 @@ def build_all_signature_defs(input_alternatives, output_alternatives, MAX_DIRECTORY_CREATION_ATTEMPTS = 10 +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_timestamped_export_dir(export_dir_base): """Builds a path to a new subdirectory within the base directory. @@ -317,6 +327,7 @@ def get_timestamped_export_dir(export_dir_base): '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_temp_export_dir(timestamped_export_dir): """Builds a directory name based on the argument but starting with 'temp-'. @@ -344,6 +355,7 @@ def _export_version_parser(path): return path._replace(export_version=int(filename)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_most_recent_export(export_dir_base): """Locate the most recent SavedModel export in a directory of many exports. @@ -363,6 +375,7 @@ def get_most_recent_export(export_dir_base): return next(iter(results or []), None) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def garbage_collect_exports(export_dir_base, exports_to_keep): """Deletes older exports, retaining only a given number of the most recent. @@ -387,6 +400,7 @@ def garbage_collect_exports(export_dir_base, exports_to_keep): logging.warn('Can not delete %s recursively: %s', p.path, e) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_export_strategy(serving_input_fn, default_output_alternative_key=None, assets_extra=None, @@ -469,6 +483,8 @@ def make_export_strategy(serving_input_fn, return export_strategy.ExportStrategy('Servo', export_fn, strip_default_attrs) +@deprecated(None, + 'Use tf.estimator.export.build_parsing_serving_input_receiver_fn') def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, @@ -555,8 +571,14 @@ def _default_compare_fn(curr_best_eval_result, cand_eval_result): class BestModelSelector(object): - """A helper that keeps track of export selection candidates.""" + """A helper that keeps track of export selection candidates. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def __init__(self, event_file_pattern=None, compare_fn=None): """Constructor of this class. @@ -622,6 +644,7 @@ class BestModelSelector(object): return best_eval_result +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_best_model_export_strategy( serving_input_fn, exports_to_keep=1, @@ -707,6 +730,7 @@ def make_best_model_export_strategy( # TODO(b/67013778): Revisit this approach when corresponding changes to # TF Core are finalized. +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def extend_export_strategy(base_export_strategy, post_export_fn, post_export_name=None): diff --git a/tensorflow/python/util/decorator_utils.py b/tensorflow/python/util/decorator_utils.py index df259c7f7c..7b4363c0e4 100644 --- a/tensorflow/python/util/decorator_utils.py +++ b/tensorflow/python/util/decorator_utils.py @@ -82,7 +82,7 @@ def add_notice_to_docstring( lines = _normalize_docstring(doc).splitlines() lines[0] += ' ' + suffix_str - notice = [''] + notice + [instructions] + notice = [''] + notice + ([instructions] if instructions else []) if len(lines) > 1: # Make sure that we keep our distance from the main body -- GitLab From d1ba271902a91a044e7515e248cd9f384a91067b Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 16:24:54 -0800 Subject: [PATCH 110/311] [XLA] In HloEvaluator, fix an issue for HandleAbs to handle complex numbers more correctly: - abs([complex numbers]) would yield floats. However since the specilization for HandleAbs is based on the return type (float), we'd CHECK fail due to float != complex when accessing the elements of the operand (complex). - enable unary_op_test for interpreter. PiperOrigin-RevId: 187099576 --- .../compiler/xla/service/hlo_evaluator.cc | 32 +++++++++++++++++-- tensorflow/compiler/xla/tests/BUILD | 1 + 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index fd06b19144..cf8b35908f 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -57,6 +57,12 @@ struct is_complex_t : public std::false_type {}; template <> struct is_complex_t : public std::true_type {}; +template +struct is_complex64_t : public std::false_type {}; + +template <> +struct is_complex64_t : public std::true_type {}; + template StatusOr> Compare(const Shape& shape, HloOpcode opcode, const Literal& lhs_literal, @@ -248,17 +254,37 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { template < typename NativeT, - typename std::enable_if::value || - is_complex_t::value>::type* = nullptr> + typename std::enable_if::value>::type* = nullptr> Status HandleAbs(HloInstruction* abs) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], - ElementWiseUnaryOp(abs, [](ElementwiseT elem_operand) { + ElementWiseUnaryOp(abs, [](NativeT elem_operand) { return std::abs(elem_operand); })); return Status::OK(); } + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleAbs(HloInstruction* abs) { + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(abs->operand(0)); + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[abs], + (ElementWiseUnaryOpImpl( + abs, [](NativeT elem_operand) { return std::abs(elem_operand); }, + operand_literal))); + + return Status::OK(); + } + Status HandleAbs(HloInstruction* abs) override { + // If the operand is of C64 type, the return type of abs will be F32. + // However, ElementwiseT would still be the return type, F32, and thus + // specifying the ElementwiseT explicitly as C64 is needed below. + if (abs->operand(0)->shape().element_type() == C64) { + return HandleAbs(abs); + } return HandleAbs(abs); } diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 33fde9737d..f3ecfc1604 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -494,6 +494,7 @@ xla_test( xla_test( name = "unary_op_test", srcs = ["unary_op_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From 60ff3890e98f53c1037440d5e535f6f79ad42d7d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 17:01:24 -0800 Subject: [PATCH 111/311] Only link the swapping code when compiling TensorFlow with CUDA support. PiperOrigin-RevId: 187104273 --- tensorflow/core/grappler/optimizers/BUILD | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 908e58bcc7..a52d1c8df2 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -3,6 +3,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") filegroup( name = "all_files", @@ -319,8 +320,6 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":gpu_swapping_kernels", - ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", @@ -336,7 +335,10 @@ cc_library( "//tensorflow/core/grappler/costs:graph_properties", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/utils:traversal", - ], + ] + if_cuda([ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", + ]), ) tf_cc_test_gpu( -- GitLab From 7bcc7ee1a9da4ec55395a935123a46b4ecb2364f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:04:09 -0800 Subject: [PATCH 112/311] Consolidate the builtin function overrides into a single module, and use a generic `dynamic_builtin` function to dispatch between implementations. Use the generic dispatcher in the generated code. PiperOrigin-RevId: 187104685 --- .../py2tf/converters/builtin_functions.py | 13 ++++--- tensorflow/contrib/py2tf/utils/BUILD | 12 +----- tensorflow/contrib/py2tf/utils/__init__.py | 4 +- .../py2tf/utils/{printing.py => builtins.py} | 32 +++++++++++++-- .../{printing_test.py => builtins_test.py} | 39 +++++++++++++++---- tensorflow/contrib/py2tf/utils/misc.py | 13 ------- tensorflow/contrib/py2tf/utils/misc_test.py | 27 +------------ 7 files changed, 72 insertions(+), 68 deletions(-) rename tensorflow/contrib/py2tf/utils/{printing.py => builtins.py} (62%) rename tensorflow/contrib/py2tf/utils/{printing_test.py => builtins_test.py} (56%) diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions.py b/tensorflow/contrib/py2tf/converters/builtin_functions.py index e69038aced..b5aa9756da 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions.py +++ b/tensorflow/contrib/py2tf/converters/builtin_functions.py @@ -36,23 +36,24 @@ class BuiltinFunctionTransformer(transformer.Base): # pylint:disable=invalid-name - def _convert_len(self, node): + def _convert_builtin(self, node): template = """ - py2tf_utils.dynamic_len(args) + py2tf_utils.dynamic_builtin(func, args) """ - return templates.replace(template, args=node.args)[0].value + return templates.replace(template, func=node.func, args=node.args)[0].value def _convert_print(self, node): template = """ - py2tf_utils.call_print(args) + py2tf_utils.dynamic_print(args) """ return templates.replace(template, args=node.args)[0].value def visit_Call(self, node): self.generic_visit(node) # TODO(mdan): This won't work if the function was hidden. - if isinstance(node.func, gast.Name) and node.func.id == 'len': - return self._convert_len(node) + if isinstance(node.func, gast.Name) and node.func.id in ('len',): + return self._convert_builtin(node) + # Print needs to be handled separately because it can be read as statement. if isinstance(node.func, gast.Name) and node.func.id == 'print': return self._convert_print(node) return node diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index c2fdd40707..2086a9ef60 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -20,10 +20,10 @@ py_library( name = "utils", srcs = [ "__init__.py", + "builtins.py", "context_managers.py", "misc.py", "multiple_dispatch.py", - "printing.py", "py_func.py", "tensor_list.py", "type_check.py", @@ -76,16 +76,6 @@ py_test( ], ) -py_test( - name = "printing_test", - srcs = ["printing_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:client_testlib", - ], -) - py_test( name = "type_check_test", srcs = ["type_check_test.py"], diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index d931322bf3..19bf2272bc 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -18,11 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf.utils.builtins import dynamic_builtin +from tensorflow.contrib.py2tf.utils.builtins import dynamic_print from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_on_returns from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while -from tensorflow.contrib.py2tf.utils.printing import call_print from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/printing.py b/tensorflow/contrib/py2tf/utils/builtins.py similarity index 62% rename from tensorflow/contrib/py2tf/utils/printing.py rename to tensorflow/contrib/py2tf/utils/builtins.py index 95a62bd80b..0a50b80b60 100644 --- a/tensorflow/contrib/py2tf/utils/printing.py +++ b/tensorflow/contrib/py2tf/utils/builtins.py @@ -12,14 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow printing support utilities.""" +"""Builtin conversion utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.util import tf_inspect + + +def dynamic_builtin(f, *args, **kwargs): + """Converts a builtin function call inline.""" + if not tf_inspect.isbuiltin(f): + return f(*args, **kwargs) + + if f is len: + return dynamic_len(*args, **kwargs) + + raise NotImplementedError('The "%s" builtin is not yet supported.' % f) + + +def dynamic_len(list_or_tensor): + """Implementation of len using dynamic dispatch.""" + if tensor_util.is_tensor(list_or_tensor): + shape = list_or_tensor.shape + if not shape: + raise ValueError( + 'len requires non-zero rank for tensor "%s"' % list_or_tensor) + return array_ops.shape(list_or_tensor)[0] + + return len(list_or_tensor) def is_tf_print_compatible(value): @@ -30,8 +56,8 @@ def is_tf_print_compatible(value): return False -def call_print(*values): - """Compiled counterpart of the print builtin. +def dynamic_print(*values): + """Implementartion of print using dynamic dispatch. The function attempts to use tf.Print if all the values are compatible. Otherwise, it will fall back to py_func. diff --git a/tensorflow/contrib/py2tf/utils/printing_test.py b/tensorflow/contrib/py2tf/utils/builtins_test.py similarity index 56% rename from tensorflow/contrib/py2tf/utils/printing_test.py rename to tensorflow/contrib/py2tf/utils/builtins_test.py index 2070deb304..19a72c63ec 100644 --- a/tensorflow/contrib/py2tf/utils/printing_test.py +++ b/tensorflow/contrib/py2tf/utils/builtins_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for printing module.""" +"""Tests for builtins module.""" from __future__ import absolute_import from __future__ import division @@ -22,28 +22,53 @@ import sys import six -from tensorflow.contrib.py2tf.utils import printing +from tensorflow.contrib.py2tf.utils import builtins +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): +class BuiltinsTest(test.TestCase): - def test_call_print_tf(self): + def test_dynamic_len_tf_scalar(self): + a = constant_op.constant(1) + + with self.assertRaises(ValueError): + with self.test_session() as sess: + sess.run(builtins.dynamic_builtin(len, a)) + + def test_dynamic_len_tf_array(self): + a = constant_op.constant([1, 2, 3]) + + with self.test_session() as sess: + self.assertEqual(3, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_tf_matrix(self): + a = constant_op.constant([[1, 2], [3, 4]]) + + with self.test_session() as sess: + self.assertEqual(2, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_py_list(self): + a = [3] * 5 + + self.assertEqual(5, builtins.dynamic_builtin(len, a)) + + def test_dynamic_print_tf(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', 1)) + sess.run(builtins.dynamic_print('test message', 1)) self.assertEqual(out_capturer.getvalue(), 'test message 1\n') finally: sys.stdout = sys.__stdout__ - def test_call_print_py_func(self): + def test_dynamic_print_complex(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', [1, 2])) + sess.run(builtins.dynamic_print('test message', [1, 2])) self.assertEqual(out_capturer.getvalue(), 'test message [1, 2]\n') finally: sys.stdout = sys.__stdout__ diff --git a/tensorflow/contrib/py2tf/utils/misc.py b/tensorflow/contrib/py2tf/utils/misc.py index 7548048388..1b06caf0bd 100644 --- a/tensorflow/contrib/py2tf/utils/misc.py +++ b/tensorflow/contrib/py2tf/utils/misc.py @@ -19,22 +19,9 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops -def dynamic_len(list_or_tensor): - """Implementation of len using dynamic dispatch.""" - if tensor_util.is_tensor(list_or_tensor): - shape = list_or_tensor.shape - if not shape: - raise ValueError( - 'len requires non-zero rank for tensor "%s"' % list_or_tensor) - return array_ops.shape(list_or_tensor)[0] - - return len(list_or_tensor) - - def alias_tensors(*args): """Wrap any Tensor arguments with an identity op. diff --git a/tensorflow/contrib/py2tf/utils/misc_test.py b/tensorflow/contrib/py2tf/utils/misc_test.py index ec88e7cb74..8aedd4cd64 100644 --- a/tensorflow/contrib/py2tf/utils/misc_test.py +++ b/tensorflow/contrib/py2tf/utils/misc_test.py @@ -19,37 +19,12 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.python.framework.constant_op import constant from tensorflow.python.ops.variables import Variable from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): - - def test_dynamic_len_tf_scalar(self): - a = constant(1) - - with self.assertRaises(ValueError): - with self.test_session() as sess: - sess.run(dynamic_len(a)) - - def test_dynamic_len_tf_array(self): - a = constant([1, 2, 3]) - - with self.test_session() as sess: - self.assertEqual(3, sess.run(dynamic_len(a))) - - def test_dynamic_len_tf_matrix(self): - a = constant([[1, 2], [3, 4]]) - - with self.test_session() as sess: - self.assertEqual(2, sess.run(dynamic_len(a))) - - def test_dynamic_len_py_list(self): - a = [3] * 5 - - self.assertEqual(5, dynamic_len(a)) +class MiscTest(test.TestCase): def test_alias_single_tensor(self): a = constant(1) -- GitLab From cb0984df5549c077621049416f69b914635208ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:27:20 -0800 Subject: [PATCH 113/311] Fix buffer assignment for conditional instruction. PiperOrigin-RevId: 187107432 --- .../compiler/xla/service/buffer_assignment.cc | 358 +++++++++--------- .../compiler/xla/service/copy_insertion.cc | 72 +++- 2 files changed, 241 insertions(+), 189 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index b1e693da9d..d44d3d71d9 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -48,6 +48,183 @@ using ::tensorflow::strings::HumanReadableNumBytes; using ::tensorflow::strings::Printf; using ::tensorflow::strings::StrAppend; +namespace { + +template +string ColocatedBufferSetsToString(const T& container, const char* title) { + string result; + StrAppend(&result, title, "\n"); + for (const auto& it : container) { + StrAppend(&result, "\t", it->ToString(), "\n"); + } + return result; +} + +// Walk the call graph of the HLO module and place each computation into either +// thread_local_computations or global_computations depending upon whether the +// computation requires thread-local allocations or global allocations. The +// elements in thread_local_computations and global_computations are in post +// order (if computation A has an instruction which calls computation B, then A +// will appear after B in the vector). +Status GatherComputationsByAllocationType( + const HloModule* module, + std::vector* thread_local_computations, + std::vector* global_computations) { + // Create a worklist of computations paired with whether the allocation must + // be thread-local. + std::deque> worklist; + worklist.push_back(std::make_pair(module->entry_computation(), + /*is_thread_local*/ false)); + + // Sets for quickly checking membership. Computations are returned in vectors + // for stable iteration. + FlatSet thread_local_set; + FlatSet global_set; + + while (!worklist.empty()) { + auto worklist_front = worklist.front(); + worklist.pop_front(); + const HloComputation* computation = worklist_front.first; + bool is_thread_local = worklist_front.second; + bool in_thread_local_set = thread_local_set.count(computation) > 0; + bool in_global_set = global_set.count(computation) > 0; + + // If the computation has already been added to the respective set, then + // nothing to do. + if ((is_thread_local && in_thread_local_set) || + (!is_thread_local && in_global_set)) { + continue; + } + + // If the computation has already been added to the other set this is an + // error condition because the global call to the computation (eg, + // while/call) may return a reference to one of the thread-local buffers to + // the calling computation which will become a dangling reference when the + // thread-local is deallocated with the call return. + if ((is_thread_local && in_global_set) || + (!is_thread_local && in_thread_local_set)) { + return InvalidArgument( + "computation %s has conflicting allocation requirements (global " + "and thread-local)", + computation->name().c_str()); + } + + if (is_thread_local) { + thread_local_set.insert(computation); + } else { + global_set.insert(computation); + } + + for (auto* instruction : computation->instructions()) { + for (HloComputation* subcomputation : + instruction->called_computations()) { + switch (instruction->opcode()) { + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kWhile: + // Call and while must be called from a computation with global + // allocations as they may return references to buffers inside the + // called computation which cannot be thread-local. + if (is_thread_local) { + return InvalidArgument( + "computation %s cannot contain call/while op because it " + "requires thread-local buffer allocations", + computation->name().c_str()); + } + worklist.push_back(std::make_pair(subcomputation, + false)); // Not thread local. + break; + case HloOpcode::kMap: + case HloOpcode::kReduce: + case HloOpcode::kReduceWindow: + case HloOpcode::kSelectAndScatter: + case HloOpcode::kFusion: + // Map/reduce etc computations are always thread-local. + worklist.push_back(std::make_pair(subcomputation, + true)); // Thread local. + break; + default: + return InternalError( + "Unexpected calling opcode: %s", + HloOpcodeString(instruction->opcode()).c_str()); + } + } + } + } + + // Add the computations to the vectors in post order. + for (auto* computation : module->MakeComputationPostOrder()) { + if (thread_local_set.count(computation) > 0) { + thread_local_computations->push_back(computation); + } else if (global_set.count(computation) > 0) { + global_computations->push_back(computation); + } + // If the computation is not reachable from the entry computation, then it + // will not appear in either thread_local_set or global_set. We don't bother + // assigning buffers for these. + } + return Status::OK(); +} + +// Checks that points-to set of 'instruction' is unambiguous and distinct +// (ensured by CopyInsertion), then adds the buffer from the points-to set at +// 'index' to 'colocated_set'. +const LogicalBuffer* AddBufferToColocatedSet( + const HloInstruction* instruction, const ShapeIndex& index, + const TuplePointsToAnalysis& points_to_analysis, + std::vector* colocated_set) { + // CopyInsertion ensures root points-to set is unambiguous and distinct. + const auto& points_to = points_to_analysis.GetPointsToSet(instruction); + DCHECK(!points_to.IsAmbiguous()); + colocated_set->push_back(points_to.element(index)[0]); + return colocated_set->back(); +} + +// Given the interference map of a graph (the list of interfering node indices +// for each node), perform graph coloring such that interfering nodes are +// assigned to different colors. Returns the assigned color of the nodes, where +// the colors are represented as integer values [0, color_count). +std::vector ColorInterferenceGraph( + const std::vector>& interference_map) { + const int64 node_count = interference_map.size(); + + // Sort the nodes such that we assign nodes with more interference first. This + // relies on the common heuristic of assigning the most constrained node + // first, but it would be good to investigate other ordering heuristics too. + std::vector nodes(node_count); + std::iota(nodes.begin(), nodes.end(), 0); + std::sort(nodes.begin(), nodes.end(), + [&interference_map](const int64 i, const int64 j) { + return interference_map[i].size() > interference_map[j].size(); + }); + + const int64 kColorUnassigned = -1; + std::vector assigned_colors(node_count, kColorUnassigned); + for (int64 node : nodes) { + // Mark the colors that are already assigned to the neighbors. + std::vector available_colors(node_count, true); + for (int64 neighbor : interference_map[node]) { + int64 color = assigned_colors[neighbor]; + if (color != kColorUnassigned) { + available_colors[color] = false; + } + } + + // Find the color that is not yet assigned to the neighbors. + int64 color = kColorUnassigned; + for (color = 0; color < available_colors.size(); ++color) { + if (available_colors[color]) { + break; + } + } + CHECK_NE(color, kColorUnassigned); + assigned_colors[node] = color; + } + return assigned_colors; +} + +} // namespace + size_t BufferAllocation::Slice::Hasher::operator()(Slice s) const { uint64 h = std::hash()(s.index()); h = tensorflow::Hash64Combine(h, std::hash()(s.offset())); @@ -523,116 +700,6 @@ BufferAssignmentProto BufferAssignment::ToProto() const { return proto; } -namespace { - -// Walk the call graph of the HLO module and place each computation into either -// thread_local_computations or global_computations depending upon whether the -// computation requires thread-local allocations or global allocations. The -// elements in thread_local_computations and global_computations are in post -// order (if computation A has an instruction which calls computation B, then A -// will appear after B in the vector). -Status GatherComputationsByAllocationType( - const HloModule* module, - std::vector* thread_local_computations, - std::vector* global_computations) { - // Create a worklist of computations paired with whether the allocation must - // be thread-local. - std::deque> worklist; - worklist.push_back(std::make_pair(module->entry_computation(), - /*is_thread_local*/ false)); - - // Sets for quickly checking membership. Computations are returned in vectors - // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; - - while (!worklist.empty()) { - auto worklist_front = worklist.front(); - worklist.pop_front(); - const HloComputation* computation = worklist_front.first; - bool is_thread_local = worklist_front.second; - bool in_thread_local_set = thread_local_set.count(computation) > 0; - bool in_global_set = global_set.count(computation) > 0; - - // If the computation has already been added to the respective set, then - // nothing to do. - if ((is_thread_local && in_thread_local_set) || - (!is_thread_local && in_global_set)) { - continue; - } - - // If the computation has already been added to the other set this is an - // error condition because the global call to the computation (eg, - // while/call) may return a reference to one of the thread-local buffers to - // the calling computation which will become a dangling reference when the - // thread-local is deallocated with the call return. - if ((is_thread_local && in_global_set) || - (!is_thread_local && in_thread_local_set)) { - return InvalidArgument( - "computation %s has conflicting allocation requirements (global " - "and thread-local)", - computation->name().c_str()); - } - - if (is_thread_local) { - thread_local_set.insert(computation); - } else { - global_set.insert(computation); - } - - for (auto* instruction : computation->instructions()) { - for (HloComputation* subcomputation : - instruction->called_computations()) { - switch (instruction->opcode()) { - case HloOpcode::kCall: - case HloOpcode::kConditional: - case HloOpcode::kWhile: - // Call and while must be called from a computation with global - // allocations as they may return references to buffers inside the - // called computation which cannot be thread-local. - if (is_thread_local) { - return InvalidArgument( - "computation %s cannot contain call/while op because it " - "requires thread-local buffer allocations", - computation->name().c_str()); - } - worklist.push_back(std::make_pair(subcomputation, - false)); // Not thread local. - break; - case HloOpcode::kMap: - case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: - case HloOpcode::kSelectAndScatter: - case HloOpcode::kFusion: - // Map/reduce etc computations are always thread-local. - worklist.push_back(std::make_pair(subcomputation, - true)); // Thread local. - break; - default: - return InternalError( - "Unexpected calling opcode: %s", - HloOpcodeString(instruction->opcode()).c_str()); - } - } - } - } - - // Add the computations to the vectors in post order. - for (auto* computation : module->MakeComputationPostOrder()) { - if (thread_local_set.count(computation) > 0) { - thread_local_computations->push_back(computation); - } else if (global_set.count(computation) > 0) { - global_computations->push_back(computation); - } - // If the computation is not reachable from the entry computation, then it - // will not appear in either thread_local_set or global_set. We don't bother - // assigning buffers for these. - } - return Status::OK(); -} - -} // namespace - /* static */ StatusOr> BufferAssigner::Run( const HloModule* module, std::unique_ptr hlo_ordering, @@ -1085,7 +1152,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( if (colocated_set.empty()) { return; } - + VLOG(5) << ColocatedBufferSetsToString(colocated_set, + "Adding colocated buffer set"); // Find existing sets that overlap with at least one buffer from the // colocated_set. The resulting 'overlap_set_indices' will have at most // colocated_buffer_sets->size() entries, and will be in increasing order. @@ -1093,6 +1161,10 @@ void BufferAssigner::AddSetToColocatedBufferSets( for (size_t index = 0; index < colocated_buffer_sets->size(); ++index) { for (const LogicalBuffer* buffer : colocated_set) { if ((*colocated_buffer_sets)[index].count(buffer) > 0) { + VLOG(5) << "Found overlap with existing set on buffer " + << buffer->ToString() << "\n" + << ColocatedBufferSetsToString((*colocated_buffer_sets)[index], + "Overlapping set"); overlap_set_indices.push_back(index); break; } @@ -1104,6 +1176,7 @@ void BufferAssigner::AddSetToColocatedBufferSets( colocated_buffer_sets->emplace_back(); colocated_buffer_sets->back().insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << "No overlap found, new group created"; return; } @@ -1115,6 +1188,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( first->insert(overlap_set.begin(), overlap_set.end()); } first->insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << ColocatedBufferSetsToString( + *first, "Result of the colocated buffer set merging"); // Remove overlap sets that we just merged. The offset accounts for the fact // that as elements are erased, the indices need to be adjusted. Keep in mind @@ -1125,67 +1200,6 @@ void BufferAssigner::AddSetToColocatedBufferSets( } } -namespace { - -// Checks that points-to set of 'instruction' is unambiguous and distinct -// (ensured by CopyInsertion), then adds the buffer from the points-to set at -// 'index' to 'colocated_set'. -const LogicalBuffer* AddBufferToColocatedSet( - const HloInstruction* instruction, const ShapeIndex& index, - const TuplePointsToAnalysis& points_to_analysis, - std::vector* colocated_set) { - // CopyInsertion ensures root points-to set is unambiguous and distinct. - const auto& points_to = points_to_analysis.GetPointsToSet(instruction); - DCHECK(!points_to.IsAmbiguous()); - colocated_set->push_back(points_to.element(index)[0]); - return colocated_set->back(); -} - -// Given the interference map of a graph (the list of interfering node indices -// for each node), perform graph coloring such that interfering nodes are -// assigned to different colors. Returns the assigned color of the nodes, where -// the colors are represented as integer values [0, color_count). -std::vector ColorInterferenceGraph( - const std::vector>& interference_map) { - const int64 node_count = interference_map.size(); - - // Sort the nodes such that we assign nodes with more interference first. This - // relies on the common heuristic of assigning the most constrained node - // first, but it would be good to investigate other ordering heuristics too. - std::vector nodes(node_count); - std::iota(nodes.begin(), nodes.end(), 0); - std::sort(nodes.begin(), nodes.end(), - [&interference_map](const int64 i, const int64 j) { - return interference_map[i].size() > interference_map[j].size(); - }); - - const int64 kColorUnassigned = -1; - std::vector assigned_colors(node_count, kColorUnassigned); - for (int64 node : nodes) { - // Mark the colors that are already assigned to the neighbors. - std::vector available_colors(node_count, true); - for (int64 neighbor : interference_map[node]) { - int64 color = assigned_colors[neighbor]; - if (color != kColorUnassigned) { - available_colors[color] = false; - } - } - - // Find the color that is not yet assigned to the neighbors. - int64 color = kColorUnassigned; - for (color = 0; color < available_colors.size(); ++color) { - if (available_colors[color]) { - break; - } - } - CHECK_NE(color, kColorUnassigned); - assigned_colors[node] = color; - } - return assigned_colors; -} - -} // namespace - std::vector BufferAssigner::MergeColocatedBufferSets( const std::vector& colocated_buffer_sets, diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index cc195879a6..df73c28597 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -58,6 +58,45 @@ bool ValueIsReadOnly(const HloValue& value) { return IsConstantValue(value) || IsEntryParameterValue(value); } +// Data structure describing the action which should be taken on parts of a +// computation buffers, with respect to the adding of special case copies. +struct SpecialCaseCopyPolicy { + // Insert a copy if the same buffer is found at multiple indices within the + // output tuple. + bool copy_root_replicated_buffers = false; + // If true, insert a copy if a buffer coming from a constant or a parameter + // is found wihtin the output tuple. + bool copy_parameters_and_constants = false; +}; + +SpecialCaseCopyPolicy GetSpecialCaseCopyPolicy(const CallGraphNode& node, + HloModule* module, + HloComputation* computation) { + SpecialCaseCopyPolicy policy; + if (computation == module->entry_computation()) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + for (const CallSite& site : node.caller_callsites()) { + // The kWhile instruction does not have an handling here, as the + // AddCopiesForWhile() API takes care of adding its own copies. + if (site.instruction()->opcode() == HloOpcode::kConditional) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + } + return policy; +} + +bool ShouldCopyRootValue(const HloValue& value, + const SpecialCaseCopyPolicy& policy) { + if (policy.copy_parameters_and_constants) { + return IsConstantValue(value) || + value.defining_instruction()->opcode() == HloOpcode::kParameter; + } + return false; +} + // Deep copy the given instructions 'from' and 'to' at the ShapeIndexes given in // 'indices_to_copy'. Add control edges from the respective kCopy instructions // in deep copy of 'from' to the respective kCopy instruction in the deep copy @@ -957,7 +996,8 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { } TF_RET_CHECK(node.context() == CallContext::kSequential); - const bool is_entry = computation == module->entry_computation(); + SpecialCaseCopyPolicy policy = + GetSpecialCaseCopyPolicy(node, module, computation); HloInstruction* root = computation->root_instruction(); // Mark nondistinct/ambiguous indices. @@ -970,27 +1010,26 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { for (const HloBuffer* buffer : buffers_at_index) { buffer_seen_before |= !seen.insert(buffer).second; } - if (buffers_at_index.size() > 1 || (buffer_seen_before && is_entry)) { - VLOG(2) << "Index " << index << " of root of computation " + if (buffers_at_index.size() > 1 || + (buffer_seen_before && policy.copy_root_replicated_buffers)) { + VLOG(2) << "Index " << index << " of computation " << computation->name() << " (" << root->name() << ") has ambiguous or non-distinct buffer. Copying."; add_index_to_copy(root, index); } }); - // For entry instructions, mark any parameter or constant values. - if (is_entry) { - for (const auto& pair : - alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { - const ShapeIndex& index = pair.first; - const HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (ValueIsReadOnly(*value)) { - VLOG(2) << "Root of entry computation (" << root->name() - << ") has constant or entry parameter value at index " - << index << ". Copying."; - add_index_to_copy(root, index); - } + for (const auto& pair : + alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (ShouldCopyRootValue(*value, policy)) { + VLOG(2) << "Root of (" << root->name() << ") of computation(" + << computation->name() + << ") has constant or parameter value at index " << index + << ". Copying."; + add_index_to_copy(root, index); } } } @@ -1012,7 +1051,6 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { instruction->parent()->set_root_instruction(deep_copy); } } - return Status::OK(); } -- GitLab From ef7c481b0aa563ab8a3bf387e97121382cbaa588 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 17:55:31 -0800 Subject: [PATCH 114/311] [XLA::Interpreter] Add support for kConditional to HloEvaluator. Also enable xla/tests/conditional_tests to run on interpreter. PiperOrigin-RevId: 187110438 --- .../compiler/xla/service/hlo_evaluator.cc | 28 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 1 + 3 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index cf8b35908f..afbfdac05e 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2491,6 +2491,34 @@ Status HloEvaluator::HandleCall(HloInstruction* call) { return Status::OK(); } +Status HloEvaluator::HandleConditional(HloInstruction* conditional) { + const auto& pred = GetEvaluatedLiteralFor(conditional->operand(0)); + const auto& true_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(1)); + const auto& false_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(2)); + + auto* true_computation = conditional->true_computation(); + auto* false_computation = conditional->false_computation(); + + auto result = Literal::CreateFromShape(conditional->shape()); + HloEvaluator embedded_evaluator; + if (pred.Get({})) { + result = embedded_evaluator + .Evaluate(*true_computation, + {&true_computation_arg}) + .ConsumeValueOrDie(); + } else { + result = embedded_evaluator + .Evaluate(*false_computation, + {&false_computation_arg}) + .ConsumeValueOrDie(); + } + + evaluated_[conditional] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index c65d9915e3..fc82011630 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleConditional(HloInstruction* conditional) override; + Status HandleCall(HloInstruction* call) override; private: diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index f3ecfc1604..19b3dfae4e 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -478,6 +478,7 @@ xla_test( xla_test( name = "conditional_test", srcs = ["conditional_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From d888a77dc31bb45dfd0416fa9202c83206f2d07e Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 17:56:15 -0800 Subject: [PATCH 115/311] Support configurable stats publishers in the grpc server. PiperOrigin-RevId: 187110497 --- .../distributed_runtime/rpc/grpc_server_lib.cc | 15 ++++++++++++--- .../distributed_runtime/rpc/grpc_server_lib.h | 6 ++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index c4ac92d809..a6f4be3eaf 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -106,7 +106,8 @@ GrpcServer::~GrpcServer() { Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, - const WorkerCreationFunction& worker_func) { + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory) { mutex_lock l(mu_); CHECK_EQ(state_, NEW); master_env_.env = env_; @@ -218,7 +219,7 @@ Status GrpcServer::Init( master_env_.ops = OpRegistry::Global(); master_env_.worker_cache = worker_cache; master_env_.master_session_factory = - [config]( + [config, stats_factory]( SessionOptions options, const MasterEnv* env, std::unique_ptr>> remote_devs, std::unique_ptr worker_cache, @@ -226,7 +227,7 @@ Status GrpcServer::Init( options.config.MergeFrom(config); return new MasterSession(options, env, std::move(remote_devs), std::move(worker_cache), std::move(device_set), - CreateNoOpStatsPublisher); + stats_factory); }; master_env_.worker_cache_factory = [this](const WorkerCacheFactoryOptions& options, @@ -241,6 +242,14 @@ Status GrpcServer::Init( return Status::OK(); } +Status GrpcServer::Init( + ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func) { + return Init(std::move(service_func), rendezvous_mgr_func, worker_func, + CreateNoOpStatsPublisher); +} + Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func) { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h index 8b12ac1461..7c2f06f618 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h @@ -22,6 +22,7 @@ limitations under the License. #include "grpc++/security/credentials.h" #include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/stats_publisher_interface.h" #include "tensorflow/core/distributed_runtime/master_env.h" #include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h" @@ -68,6 +69,11 @@ class GrpcServer : public ServerInterface { const string target() const override; protected: + Status Init(ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory); + Status Init(ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, const WorkerCreationFunction& worker_func); -- GitLab From 7a2ba8edbaa6491ff33ae1412d9ba45e80c2cc3c Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 26 Feb 2018 18:04:55 -0800 Subject: [PATCH 116/311] Modify retrain script to output TFLite compatible quantized models. -Also fix flaky input name selection introduced by last PR. -Also rely on tf.contrib.quantize to do graph transformations. -Also, update retrain script to use new float mobilenet_v1 and quantized mobilenet_v1 models. PiperOrigin-RevId: 187111533 --- .../examples/image_retraining/retrain.py | 317 +++++++++++------- .../examples/image_retraining/retrain_test.py | 44 ++- 2 files changed, 229 insertions(+), 132 deletions(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 25e09fecbf..99a71206ac 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -75,13 +75,16 @@ python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` -Run quantized version of mobilenet: +Run mobilenet, instrumented for quantization: ```bash python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized + --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quant ``` +These instrumented models can be converted to fully quantized mobile models via +TensorFlow Lite. + There are 32 different Mobilenet models to choose from, with a variety of file size and latency options. The first number can be '1.0', '0.75', '0.50', or '0.25' to control the size, and the second controls the input image size, either @@ -121,7 +124,6 @@ import numpy as np from six.moves import urllib import tensorflow as tf -from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile @@ -135,6 +137,9 @@ FLAGS = None # need to update these to reflect the values in the network you're using. MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 # ~134M +# The location where variable checkpoints will be stored. +CHECKPOINT_NAME = '/tmp/_retrain_checkpoint' + def create_image_lists(image_dir, testing_percentage, validation_percentage): """Builds a list of training images from the file system. @@ -745,9 +750,9 @@ def variable_summaries(var): tf.summary.histogram('histogram', var) -def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size, quantize_layer): - """Adds a new softmax and fully-connected layer for training. +def add_final_retrain_ops(class_count, final_tensor_name, bottleneck_tensor, + bottleneck_tensor_size, quantize_layer, is_training): + """Adds a new softmax and fully-connected layer for training and eval. We need to retrain the top layer to identify our new classes, so this function adds the right operations to the graph, along with some variables to hold the @@ -763,7 +768,9 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, bottleneck_tensor: The output of the main CNN graph. bottleneck_tensor_size: How many entries in the bottleneck vector. quantize_layer: Boolean, specifying whether the newly added layer should be - quantized. + instrumented for quantized. + is_training: Boolean, specifying whether the newly add layer is for training + or eval. Returns: The tensors for the training and cross entropy results, and tensors for the @@ -778,50 +785,41 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, ground_truth_input = tf.placeholder( tf.int64, [None], name='GroundTruthInput') - # Organizing the following ops as `final_training_ops` so they're easier - # to see in TensorBoard - layer_name = 'final_training_ops' + # Organizing the following ops so they are easier to see in TensorBoard. + layer_name = 'final_retrain_ops' with tf.name_scope(layer_name): with tf.name_scope('weights'): initial_value = tf.truncated_normal( [bottleneck_tensor_size, class_count], stddev=0.001) layer_weights = tf.Variable(initial_value, name='final_weights') - if quantize_layer: - quantized_layer_weights = quant_ops.MovingAvgQuantize( - layer_weights, is_training=True) - variable_summaries(quantized_layer_weights) - variable_summaries(layer_weights) + with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') - if quantize_layer: - quantized_layer_biases = quant_ops.MovingAvgQuantize( - layer_biases, is_training=True) - variable_summaries(quantized_layer_biases) - variable_summaries(layer_biases) with tf.name_scope('Wx_plus_b'): - if quantize_layer: - logits = tf.matmul(bottleneck_input, - quantized_layer_weights) + quantized_layer_biases - logits = quant_ops.MovingAvgQuantize( - logits, - init_min=-32.0, - init_max=32.0, - is_training=True, - num_bits=8, - narrow_range=False, - ema_decay=0.5) - tf.summary.histogram('pre_activations', logits) - else: - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) + logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases + tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) + # The tf.contrib.quantize functions rewrite the graph in place for + # quantization. The imported model graph has already been rewritten, so upon + # calling these rewrites, only the newly added final layer will be + # transformed. + if quantize_layer: + if is_training: + tf.contrib.quantize.create_training_graph() + else: + tf.contrib.quantize.create_eval_graph() + tf.summary.histogram('activations', final_tensor) + # If this is an eval graph, we don't need to add loss ops or an optimizer. + if not is_training: + return None, None, bottleneck_input, ground_truth_input, final_tensor + with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) @@ -857,13 +855,91 @@ def add_evaluation_step(result_tensor, ground_truth_tensor): return evaluation_step, prediction -def save_graph_to_file(sess, graph, graph_file_name): +def run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor): + """Runs a final evaluation on an eval graph using the test data set. + + Args: + sess: Session for the train graph. + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + image_lists: Dictionary of training images for each label. + jpeg_data_tensor: The layer to feed jpeg image data into. + decoded_image_tensor: The output of decoding and resizing the image. + resized_image_tensor: The input node of the recognition graph. + bottleneck_tensor: The bottleneck output layer of the CNN graph. + """ + (sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) = build_eval_session(model_info, class_count) + + test_bottlenecks, test_ground_truth, test_filenames = ( + get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, + 'testing', FLAGS.bottleneck_dir, + FLAGS.image_dir, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor, FLAGS.architecture)) + test_accuracy, predictions = sess.run( + [evaluation_step, prediction], + feed_dict={ + bottleneck_input: test_bottlenecks, + ground_truth_input: test_ground_truth + }) + tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % + (test_accuracy * 100, len(test_bottlenecks))) + + if FLAGS.print_misclassified_test_images: + tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') + for i, test_filename in enumerate(test_filenames): + if predictions[i] != test_ground_truth[i]: + tf.logging.info('%70s %s' % (test_filename, + list(image_lists.keys())[predictions[i]])) + + +def build_eval_session(model_info, class_count): + """Builds an restored eval session without train operations for exporting. + + Args: + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + + Returns: + Eval session containing the restored eval graph. + The bottleneck input, ground truth, eval step, and prediction tensors. + """ + # If quantized, we need to create the correct eval graph for exporting. + eval_graph, bottleneck_tensor, _ = create_model_graph(model_info) + + eval_sess = tf.Session(graph=eval_graph) + with eval_graph.as_default(): + # Add the new layer for exporting. + (_, _, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + False) + + # Now we need to restore the values from the training graph to the eval + # graph. + tf.train.Saver().restore(eval_sess, CHECKPOINT_NAME) + + evaluation_step, prediction = add_evaluation_step(final_tensor, + ground_truth_input) + + return (eval_sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) + + +def save_graph_to_file(graph, graph_file_name, model_info, class_count): + """Saves an graph to file, creating a valid quantized one if necessary.""" + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + output_graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) with gfile.FastGFile(graph_file_name, 'wb') as f: f.write(output_graph_def.SerializeToString()) - return def prepare_file_system(): @@ -916,11 +992,10 @@ def create_model_info(architecture): return None version_string = parts[1] if (version_string != '1.0' and version_string != '0.75' and - version_string != '0.50' and version_string != '0.25'): + version_string != '0.5' and version_string != '0.25'): tf.logging.error( - """"The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25', - but found '%s' for architecture '%s'""", - version_string, architecture) + """"The Mobilenet version should be '1.0', '0.75', '0.5', or '0.25', + but found '%s' for architecture '%s'""", version_string, architecture) return None size_string = parts[2] if (size_string != '224' and size_string != '192' and @@ -933,35 +1008,26 @@ def create_model_info(architecture): if len(parts) == 3: is_quantized = False else: - if parts[3] != 'quantized': + if parts[3] != 'quant': tf.logging.error( "Couldn't understand architecture suffix '%s' for '%s'", parts[3], architecture) return None is_quantized = True + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/' + model_name = 'mobilenet_v1_' + version_string + '_' + size_string if is_quantized: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'Placeholder:0' - model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + - '_quantized_frozen') - model_base_name = 'quantized_frozen_graph.pb' - - else: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'input:0' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string - model_base_name = 'frozen_graph.pb' + model_name += '_quant' + data_url += model_name + '.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'input:0' + model_file_name = model_name + '_frozen.pb' bottleneck_tensor_size = 1001 input_width = int(size_string) input_height = int(size_string) input_depth = 3 - model_file_name = os.path.join(model_dir_name, model_base_name) input_mean = 127.5 input_std = 127.5 else: @@ -1011,43 +1077,45 @@ def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, return jpeg_data, mul_image -def export_model(sess, architecture, saved_model_dir): +def export_model(model_info, class_count, saved_model_dir): """Exports model for serving. Args: - sess: Current active TensorFlow Session. - architecture: Model architecture. + model_info: The modelinfo for the current model. + class_count: The number of classes. saved_model_dir: Directory in which to save exported model and variables. """ - if architecture == 'inception_v3': - input_tensor = 'DecodeJpeg/contents:0' - elif architecture.startswith('mobilenet_'): - input_tensor = 'input:0' - else: - raise ValueError('Unknown architecture', architecture) - in_image = sess.graph.get_tensor_by_name(input_tensor) - inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} - - out_classes = sess.graph.get_tensor_by_name('final_result:0') - outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)} + # The SavedModel should hold the eval graph. + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + with graph.as_default(): + input_tensor = model_info['resized_input_tensor_name'] + in_image = sess.graph.get_tensor_by_name(input_tensor) + inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} + + out_classes = sess.graph.get_tensor_by_name('final_result:0') + outputs = { + 'prediction': tf.saved_model.utils.build_tensor_info(out_classes) + } - signature = tf.saved_model.signature_def_utils.build_signature_def( - inputs=inputs, - outputs=outputs, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) + signature = tf.saved_model.signature_def_utils.build_signature_def( + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) - legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') + legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') - # Save out the SavedModel. - builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - signature - }, - legacy_init_op=legacy_init_op) - builder.save() + # Save out the SavedModel. + builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) + builder.add_meta_graph_and_variables( + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants. + DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature + }, + legacy_init_op=legacy_init_op) + builder.save() def main(_): @@ -1064,11 +1132,6 @@ def main(_): tf.logging.error('Did not recognize architecture flag') return -1 - # Set up the pre-trained graph. - maybe_download_and_extract(model_info['data_url']) - graph, bottleneck_tensor, resized_image_tensor = ( - create_model_graph(model_info)) - # Look at the folder structure, and create lists of all the images. image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, FLAGS.validation_percentage) @@ -1087,6 +1150,19 @@ def main(_): FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, FLAGS.random_brightness) + # Set up the pre-trained graph. + maybe_download_and_extract(model_info['data_url']) + graph, bottleneck_tensor, resized_image_tensor = ( + create_model_graph(model_info)) + + # Add the new layer that we'll be training. + with graph.as_default(): + (train_step, cross_entropy, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + True) + with tf.Session(graph=graph) as sess: # Set up the image decoding sub-graph. jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding( @@ -1110,15 +1186,8 @@ def main(_): decoded_image_tensor, resized_image_tensor, bottleneck_tensor, FLAGS.architecture) - # Add the new layer that we'll be training. - (train_step, cross_entropy, bottleneck_input, ground_truth_input, - final_tensor) = add_final_training_ops( - len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer']) - # Create the operations we need to evaluate the accuracy of our new layer. - evaluation_step, prediction = add_evaluation_step( - final_tensor, ground_truth_input) + evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input) # Merge all the summaries and write them out to the summaries_dir merged = tf.summary.merge_all() @@ -1128,6 +1197,10 @@ def main(_): validation_writer = tf.summary.FileWriter( FLAGS.summaries_dir + '/validation') + # Create a train saver that is used to restore values into an eval graph + # when exporting models. + train_saver = tf.train.Saver() + # Set up all our weights to their initial default values. init = tf.global_variables_initializer() sess.run(init) @@ -1168,6 +1241,9 @@ def main(_): (datetime.now(), i, train_accuracy * 100)) tf.logging.info('%s: Step %d: Cross entropy = %f' % (datetime.now(), i, cross_entropy_value)) + # TODO(suharshs): Make this use an eval graph, to avoid quantization + # moving averages being updated by the validation set, though in + # practice this makes a negligable difference. validation_bottlenecks, validation_ground_truth, _ = ( get_random_cached_bottlenecks( sess, image_lists, FLAGS.validation_batch_size, 'validation', @@ -1190,42 +1266,32 @@ def main(_): if (intermediate_frequency > 0 and (i % intermediate_frequency == 0) and i > 0): + # If we want to do an intermediate save, save a checkpoint of the train + # graph, to restore into the eval graph. + train_saver.save(sess, CHECKPOINT_NAME) intermediate_file_name = (FLAGS.intermediate_output_graphs_dir + 'intermediate_' + str(i) + '.pb') tf.logging.info('Save intermediate result to : ' + intermediate_file_name) - save_graph_to_file(sess, graph, intermediate_file_name) + save_graph_to_file(graph, intermediate_file_name, model_info, + class_count) + + # After training is complete, force one last save of the train checkpoint. + train_saver.save(sess, CHECKPOINT_NAME) # We've completed all our training, so run a final test evaluation on # some new images we haven't used before. - test_bottlenecks, test_ground_truth, test_filenames = ( - get_random_cached_bottlenecks( - sess, image_lists, FLAGS.test_batch_size, 'testing', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture)) - test_accuracy, predictions = sess.run( - [evaluation_step, prediction], - feed_dict={bottleneck_input: test_bottlenecks, - ground_truth_input: test_ground_truth}) - tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % - (test_accuracy * 100, len(test_bottlenecks))) - - if FLAGS.print_misclassified_test_images: - tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') - for i, test_filename in enumerate(test_filenames): - if predictions[i] != test_ground_truth[i]: - tf.logging.info('%70s %s' % - (test_filename, - list(image_lists.keys())[predictions[i]])) + run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor) # Write out the trained graph and labels with the weights stored as # constants. - save_graph_to_file(sess, graph, FLAGS.output_graph) + save_graph_to_file(graph, FLAGS.output_graph, model_info, class_count) with gfile.FastGFile(FLAGS.output_labels, 'w') as f: f.write('\n'.join(image_lists.keys()) + '\n') - export_model(sess, FLAGS.architecture, FLAGS.saved_model_dir) + export_model(model_info, class_count, FLAGS.saved_model_dir) if __name__ == '__main__': @@ -1406,8 +1472,9 @@ if __name__ == '__main__': form 'mobilenet__[_quantized]'. For example, 'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224 pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much - less accurate, but smaller and faster network that's 920 KB on disk and - takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html + smaller and less accurate model, taking 128x128 images, and instrumented + for eventual quantization via TensorFlow Lite. + See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html for more information on Mobilenet.\ """) parser.add_argument( diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py index 8b8dd45fd7..fb7324c58a 100644 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ b/tensorflow/examples/image_retraining/retrain_test.py @@ -67,22 +67,52 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortResult:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOps(self, flags_mock): + def testAddFinalRetrainOps(self, flags_mock): with tf.Graph().as_default(): with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) + # Test creating final training op with quantization. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, False, + False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOpsQuantized(self, flags_mock): - with tf.Graph().as_default(): + def testAddFinalRetrainOpsQuantized(self, flags_mock): + # Ensure that the training and eval graph for quantized models are correctly + # created. + with tf.Graph().as_default() as g: + with tf.Session() as sess: + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization, set is_training to + # true. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, True) + self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + # Ensure that the inputs of each FakeQuant operations has 2 Assign + # operations in the training graph (Assign[Min,Max]Last, + # Assign[Min,Max]Ema) + self.assertEqual(2, + len([i for i in op.inputs if 'Assign' in i.name])) + self.assertEqual(found_fake_quant, 2) + with tf.Graph().as_default() as g: with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) + # Test creating final training op with quantization, set is_training to + # false. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + for i in op.inputs: + # Ensure that no operations are Assign operation since this is the + # evaluation graph. + self.assertTrue('Assign' not in i.name) + self.assertEqual(found_fake_quant, 2) def testAddEvaluationStep(self): with tf.Graph().as_default(): -- GitLab From 9139a571f852d06541b0c9f2343c701ac4b7d4ff Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 18:05:59 -0800 Subject: [PATCH 117/311] Remove old implementation of the adaptive shared batcher, the in flight batches implemntation delivers similar performance but is simpler and requires less tuning. PiperOrigin-RevId: 187111685 --- .../adaptive_shared_batch_scheduler.h | 172 +----- .../adaptive_shared_batch_scheduler_test.cc | 488 +++++------------- 2 files changed, 140 insertions(+), 520 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 25c5f9cf42..661ed239d3 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -50,43 +50,26 @@ class ASBSQueue; // track of a number of queues (one per model or model version) which are // continuously enqueuing requests. The scheduler groups the requests into // batches which it periodically sends off for processing (see -// shared_batch_scheduler.h for more details). The AdaptiveSharedBatchScheduler -// prioritizes batches by age (i.e. the batch's oldest request) irrespective of -// queue or batch size. +// shared_batch_scheduler.h for more details). AdaptiveSharedBatchScheduler +// (ASBS) prioritizes batches by age (i.e. the batch's oldest request) +// irrespective of queue or batch size. // -// The scheduling decision currently exists in two flavors, controlled by the -// option use_in_flight_batches_implementation. It is expected that setting this -// option to true will give universally better results; after a period of -// testing to confirm, the old implementation will be removed. -// -// If use_in_flight_batches_implementation is set to true, the scheduler -// limits the number of batches which can be processed concurrently. If a new -// batch is created, and the number of in flight batches is below the limit, -// the next (i.e. oldest) batch is immediately scheduled. Similarly, when a -// batch finishes processing, the limit is rechecked, and another batch may be -// scheduled. To avoid the need to carefully tune the limit for workload, -// model type, platform, etc, it is dynamically adjusted in order to provide the -// lowest latency. -// -// If use_in_flight_batches_implementation is set to false, the scheduler will -// process the oldest batch at an adjustable rate, regardless of batch size. -// The user can provide feedback to help set this rate to achieve some goal -// (i.e. minimize overall latency, limit cpu usage, etc). The rate (or rather, -// the corresponding period) is adjusted each time a batch is processed, using -// an exponentially weighted moving average to smooth noisy feedback: -// ewma_feedback = ((N - 1) * ewma_feedback + feedback()) / N -// period *= (1 + K * emwa_feedback) +// ASBS tries to keep the system busy by maintaining an adjustable number of +// concurrently processed batches. If a new batch is created, and the number of +// in flight batches is below the target, the next (i.e. oldest) batch is +// immediately scheduled. Similarly, when a batch finishes processing, the +// target is rechecked, and another batch may be scheduled. To avoid the need +// to carefully tune the target for workload, model type, platform, etc, it is +// dynamically adjusted in order to provide the lowest average latency. // // Some potential use cases: // Hardware Accelerators (GPUs & TPUs) - If some phase of batch processing // involves serial processing by a device, from a latency perspective it is // desirable to keep the device evenly loaded, avoiding the need to wait for // the device to process prior batches. -// feedback = num_pending_on_device() - desired_pending. // CPU utilization - If the batch processing is cpu dominated, you can reap // latency gains when underutilized by increasing the processing rate, but // back the rate off when the load increases to avoid overload. -// feedback = cpu_rate() - desired_cpu_rate. template class AdaptiveSharedBatchScheduler @@ -101,13 +84,17 @@ class AdaptiveSharedBatchScheduler struct Options { // The name to use for the pool of batch threads. string thread_pool_name = {"batch_threads"}; - // Number of batch processing threads; equivalently the maximum number of - // concurrently running batches. + // Number of batch processing threads - the maximum value of + // in_flight_batches_limit_. It is recommended that this value be set by + // running the system under load, observing the learned value for + // in_flight_batches_limit_, and setting this maximum to ~ 2x the value. + // Under low load, in_flight_batches_limit_ has no substantial effect on + // latency and therefore undergoes a random walk. Unreasonably large values + // for num_batch_threads allows for large in_flight_batches_limit_, which + // will harm latency for some time once load increases again. int64 num_batch_threads = port::NumSchedulableCPUs(); // The environment to use (typically only overridden by test code). Env* env = Env::Default(); - // Which implementation to use (described in class comments above). - bool use_in_flight_batches_implementation = false; // Initial limit for number of batches being concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of // 3.2 results in an actual cap of 3 80% of the time, and 4 20% of the time. @@ -116,28 +103,6 @@ class AdaptiveSharedBatchScheduler // numbers will give less noisy latency measurements, but will be less // responsive to changes in workload. int64 batches_to_average_over = 1000; - - // TODO(kte): remove the rate based implementation and corresponding options - // below once testing confirms the superiority of the in flight batches - // implementation. - // Initial batch scheduling period in microseconds. Will be altered for - // non-zero rate_feedback. - double initial_scheduling_period_micros = 500; - // Minimum batch scheduling period in microseconds. Recommend setting this - // value greater than 0, otherwise it may take a while to recover from a - // sustained time of negative scheduling_period_feedback (which may occur - // under low load). - double min_scheduling_period_micros = 100; - // Maximum batch scheduling period in microseconds. - double max_scheduling_period_micros = 10000; - // Feedback function used to modify the scheduling period each time a batch - // is scheduled. Should return values roughly O(1), with positive values - // resulting in an increased period. - std::function scheduling_period_feedback{[] { return 0.; }}; - // To handle potentially noisy scheduling_period_feedback, the period is - // adjusted using an exponentially weighted moving average over the previous - // feedback_smoothing_batches batches. Must be greater than 0. - int64 feedback_smoothing_batches = 10; }; // Ownership is shared between the caller of Create() and any queues created @@ -171,17 +136,11 @@ class AdaptiveSharedBatchScheduler explicit AdaptiveSharedBatchScheduler(const Options& options); - // Batch scheduling function which runs every scheduling_period_ microseconds. - // Only used when options_.use_in_flight_batches_implementation == false. - void ProcessOneBatch(); - // Tracks processing latency and adjusts in_flight_batches_limit to minimize. - // Only used when options_.use_in_flight_batches_implementation == true. void CallbackWrapper(const internal::ASBSBatch* batch, BatchProcessor callback); // Schedules batch if in_flight_batches_limit_ is not met. - // Only used when options_.use_in_flight_batches_implementation == true. void MaybeScheduleNextBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_); // Notifies scheduler of non-empty batch which is eligible for processing. @@ -212,41 +171,22 @@ class AdaptiveSharedBatchScheduler mutex mu_; - // Responsible for running ProcessOneBatch. PeriodicFunction was used in order - // to check for deletion so that the thread can be shut down. - // Only used when options_.use_in_flight_batches_implementation == false. - std::unique_ptr scheduling_thread_; - // Responsible for running the batch processing callbacks. std::unique_ptr batch_thread_pool_; - // Time interval in microseconds between successive ProcessOneBatch calls. - // Only used when options_.use_in_flight_batches_implementation == false. - double scheduling_period_; - - // Exponentially weighted moving average of - // options_.scheduling_period_feedback() evaluated in each ProcessOneBatch - // call. - // Only used when options_.use_in_flight_batches_implementation == false. - double ewma_feedback_ = 0; - // Limit on number of batches which can be concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of 3.2 // results in an actual cap of 3 80% of the time, and 4 20% of the time. - // Only used when options_.use_in_flight_batches_implementation == true. double in_flight_batches_limit_ GUARDED_BY(mu_); // Number of batches currently being processed. - // Only used when options_.use_in_flight_batches_implementation == true. int64 in_flight_batches_ GUARDED_BY(mu_) = 0; // RNG engine and distribution. - // Only used when options_.use_in_flight_batches_implementation == true. std::default_random_engine rand_engine_; std::uniform_real_distribution rand_double_; // Fields controlling the dynamic adjustment of in_flight_batches_limit_. - // Only used when options_.use_in_flight_batches_implementation == true. // Number of batches since the last in_flight_batches_limit_ adjustment. int64 batch_count_ GUARDED_BY(mu_) = 0; // Sum of processing latency for batches counted by batch_count_. @@ -348,32 +288,6 @@ Status AdaptiveSharedBatchScheduler::Create( return errors::InvalidArgument("num_batch_threads must be positive; was ", options.num_batch_threads); } - if (options.min_scheduling_period_micros < 0) { - return errors::InvalidArgument( - "min_scheduling_period_micros must be >= 0; was ", - options.min_scheduling_period_micros); - } - if (options.min_scheduling_period_micros > - options.initial_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be >= min_scheduling_period_micros (", - options.min_scheduling_period_micros, ")"); - } - if (options.initial_scheduling_period_micros > - options.max_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be <= max_scheduling_period_micros (", - options.max_scheduling_period_micros, ")"); - } - if (options.feedback_smoothing_batches < 1) { - return errors::InvalidArgument( - "feedback_smoothing_batches must be positive; was ", - options.feedback_smoothing_batches); - } if (options.initial_in_flight_batches_limit > options.num_batch_threads) { return errors::InvalidArgument( "initial_in_flight_batches_limit (", @@ -401,20 +315,12 @@ template AdaptiveSharedBatchScheduler::AdaptiveSharedBatchScheduler( const Options& options) : options_(options), - scheduling_period_(options.initial_scheduling_period_micros), in_flight_batches_limit_(options.initial_in_flight_batches_limit), rand_double_(0.0, 1.0) { std::random_device device; rand_engine_.seed(device()); - PeriodicFunction::Options opts; - opts.thread_name_prefix = "scheduling_thread"; - opts.env = GetEnv(); batch_thread_pool_.reset(new thread::ThreadPool( GetEnv(), options.thread_pool_name, options.num_batch_threads)); - if (!options.use_in_flight_batches_implementation) { - scheduling_thread_.reset( - new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts)); - } } template @@ -443,9 +349,7 @@ void AdaptiveSharedBatchScheduler::AddBatch( const internal::ASBSBatch* batch) { mutex_lock l(mu_); batches_.push(batch); - if (options_.use_in_flight_batches_implementation) { - MaybeScheduleNextBatch(); - } + MaybeScheduleNextBatch(); } template @@ -523,44 +427,6 @@ void AdaptiveSharedBatchScheduler::CallbackWrapper( MaybeScheduleNextBatch(); } -template -void AdaptiveSharedBatchScheduler::ProcessOneBatch() { - static const double kFeedbackMultiplier = .001; - const internal::ASBSBatch* batch = nullptr; - BatchProcessor callback; - const int64 start_time_micros = GetEnv()->NowMicros(); - { - mutex_lock l(mu_); - if (!batches_.empty()) { - batch = batches_.top(); - batches_.pop(); - callback = queues_and_callbacks_[batch->queue()]; - } - } - if (batch != nullptr) { - double feedback = options_.scheduling_period_feedback(); - const int64 N = options_.feedback_smoothing_batches; - ewma_feedback_ = ((N - 1) * ewma_feedback_ + feedback) / N; - scheduling_period_ *= (1 + kFeedbackMultiplier * ewma_feedback_); - if (scheduling_period_ < options_.min_scheduling_period_micros) { - scheduling_period_ = options_.min_scheduling_period_micros; - } else if (scheduling_period_ > options_.max_scheduling_period_micros) { - scheduling_period_ = options_.max_scheduling_period_micros; - } - // Queue may destroy itself after ReleaseBatch is called. - batch->queue()->ReleaseBatch(batch); - batch_thread_pool_->Schedule([callback, batch] { - callback(std::unique_ptr>( - const_cast*>(batch))); - }); - } - const int64 sleep_time = - scheduling_period_ - (GetEnv()->NowMicros() - start_time_micros); - if (sleep_time > 0) { - GetEnv()->SleepForMicroseconds(sleep_time); - } -} - template bool AdaptiveSharedBatchScheduler::BatchCompare::operator()( const internal::ASBSBatch* a, diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc index 8ae8ca02ec..109234287e 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc @@ -64,59 +64,6 @@ std::unique_ptr CreateFakeClockAdvancerThread( })); } -TEST(AdaptiveSharedBatchSchedulerTest, Basic) { - for (const bool delete_scheduler_early : {false, true}) { - for (const bool delete_queue_1_early : {false, true}) { - int queue_0_tasks = 0; - auto queue_0_callback = - [&queue_0_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - }; - int queue_1_tasks = 0; - auto queue_1_callback = - [&queue_1_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - }; - { - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create({}, &scheduler)); - - // Create two queues. - std::unique_ptr> queue_0; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_0_callback, &queue_0)); - std::unique_ptr> queue_1; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_1_callback, &queue_1)); - - if (delete_scheduler_early) { - // Delete our copy of the scheduler. The queues should keep it alive - // under the covers. - scheduler = nullptr; - } - // Submit tasks to the two queues, and (optionally) remove the queues. - TF_ASSERT_OK(ScheduleTask(1, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(2, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(3, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(4, queue_1.get())); - if (delete_queue_1_early) { - queue_1 = nullptr; - } - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - } - EXPECT_EQ(queue_0_tasks, 9); - EXPECT_EQ(queue_1_tasks, 6); - } - } -} - TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { using Scheduler = AdaptiveSharedBatchScheduler; std::shared_ptr scheduler; @@ -124,24 +71,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { options.num_batch_threads = 0; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1000; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 100; - options.max_scheduling_period_micros = 50; - options.initial_scheduling_period_micros = 75; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.feedback_smoothing_batches = 0; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); options.initial_in_flight_batches_limit = 0.5; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); @@ -153,301 +82,8 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); } -TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue_0; - std::unique_ptr> queue_1; - int queue_0_tasks = 0; - int queue_1_tasks = 0; - auto queue_0_callback = [&queue_0_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - auto queue_1_callback = [&queue_1_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 0; - // Queue must have max_enqueued_batchs > 1. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0).ok()); - queue_options.max_enqueued_batches = 2; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0)); - EXPECT_EQ(10, queue_0->max_task_size()); - queue_options.max_batch_size = 0; - // Queue must have max_batch_size > 0. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1).ok()); - queue_options.max_batch_size = 2; - queue_options.max_enqueued_batches = 1; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(15, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - env.AdvanceByMicroseconds(1); - - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(3, queue_1.get()).ok()); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - env.AdvanceByMicroseconds(1); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(1, queue_1.get()).ok()); - - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(6, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(4, queue_0.get())); - - // Batches should be processed in order from oldest to newest. - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 0); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 2); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 19); - EXPECT_EQ(queue_1_tasks, 2); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, RateFeedback) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.min_scheduling_period_micros = 200; - options.max_scheduling_period_micros = 2000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 1; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 6 batches. - for (int i = 0; i < 6; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -500; - env.AdvanceByMicroseconds(994); - env.BlockUntilThreadsAsleep(2); // scheduling period = 500 usec. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(500); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 901); - feedback = 0; - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 902); - feedback = 10000; // large feedback should hit max_scheduling_period. - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 2000 usec. - EXPECT_EQ(scheduled_items, 903); - feedback = -10000; // large feedback should hit min_scheduling_period. - env.AdvanceByMicroseconds(1999); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 903); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); // scheduling period = 200 usec. - EXPECT_EQ(scheduled_items, 904); - env.AdvanceByMicroseconds(200); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 905); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, FeedbackSmoothing) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 3; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 4 batches. - for (int i = 0; i < 4; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -300; - env.AdvanceByMicroseconds(996); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 100, scheduling_period = 900. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(899); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 167, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 901); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 901); - feedback = 1000 / 3.; - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // emwa_feedback = 0, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 903); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 10; - TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 3 tasks. - EXPECT_EQ(queue->NumEnqueuedTasks(), 0); - EXPECT_EQ(queue->SchedulingCapacity(), 100); - TF_ASSERT_OK(ScheduleTask(5, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 1); - EXPECT_EQ(queue->SchedulingCapacity(), 95); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(6, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 2); - EXPECT_EQ(queue->SchedulingCapacity(), 84); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(1, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 3); - EXPECT_EQ(queue->SchedulingCapacity(), 83); - - env.AdvanceByMicroseconds(998); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 5); - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 7); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { +TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimit) { AdaptiveSharedBatchScheduler::Options options; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1000; mutex mu; @@ -476,7 +112,7 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { std::unique_ptr> queue; TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - // Enqueue 3 batches. + // Enqueue 3 tasks, should result in 3 batches. for (int i = 0; i < 3; i++) { TF_ASSERT_OK(ScheduleTask(100, queue.get())); } @@ -490,7 +126,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { { AdaptiveSharedBatchScheduler::Options options; options.env = &env; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1; auto queue_callback = [&env](std::unique_ptr> batch) { @@ -544,6 +179,125 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { } stop_teardown.Notify(); } + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteQueue) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::unique_ptr queue_deleter; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete queue, should be kept alive until empty. + queue_deleter.reset(Env::Default()->StartThread( + {}, "QueueDeleterThread", [&queue, &mu, &processed_batches] { + queue.reset(); + mutex_lock l(mu); + EXPECT_EQ(processed_batches, 2); + })); + // Give queue_deleter thread time to delete queue. + Env::Default()->SleepForMicroseconds(1000); + finish_processing.Notify(); +} + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteScheduler) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete scheduler, should be kept alive until queues are empty. + scheduler.reset(); + finish_processing.Notify(); + while (true) { + mutex_lock l(mu); + if (processed_batches == 2) break; + } +} + +TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + mu.lock(); + int batch_num = ++processed_batches; + mu.unlock(); + if (batch_num == 1) { + finish_processing.WaitForNotification(); + } + }; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // First batch was immediately processed, no longer counts as enqueued. + EXPECT_EQ(queue->NumEnqueuedTasks(), 1); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 900); + // Enqueue 2 more tasks, should fall in same batch. + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + TF_ASSERT_OK(ScheduleTask(200, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 3); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 600); + // Enqueue 1 more task, should create new batch. + TF_ASSERT_OK(ScheduleTask(700, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 4); + EXPECT_EQ(queue->SchedulingCapacity(), 8 * 1000 + 300); + finish_processing.Notify(); +} } // namespace anonymous } // namespace serving } // namespace tensorflow -- GitLab From 9ba9cf259b38af8425f4ee3b8967b811575fd149 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 19:46:27 -0800 Subject: [PATCH 118/311] Make sure rounding and handling of denormals in Grappler is the same as in TensorFlow. Enable constant folding for more types, particularly on GPUs. PiperOrigin-RevId: 187120456 --- tensorflow/core/grappler/op_types.cc | 6 +- .../grappler/optimizers/constant_folding.cc | 96 ++++++++++++------- tensorflow/core/kernels/constant_op.cc | 11 +++ 3 files changed, 74 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index e225e99a9e..9b3755ddce 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -354,7 +354,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { return false; } const OpDef* op_def = nullptr; - Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); + const string& op_name = node.op(); + Status status = OpRegistry::Global()->LookUpOpDef(op_name, &op_def); if (!status.ok()) { return false; } @@ -368,7 +369,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { } } // Some nodes do in-place updates on regular tensor inputs. - if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace")) { + if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace") || + StringPiece(op_name).starts_with("Inplace")) { return false; } return true; diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 10ca7dcce0..a5417aaa51 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -35,7 +35,9 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/setround.h" #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/bcast.h" @@ -51,7 +53,14 @@ class EigenThreadPoolWrapper : public Eigen::ThreadPoolInterface { explicit EigenThreadPoolWrapper(thread::ThreadPool* pool) : pool_(pool) {} ~EigenThreadPoolWrapper() override {} void Schedule(std::function fn) override { - pool_->Schedule(std::move(fn)); + auto wrapped = [=]() { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + fn(); + }; + pool_->Schedule(std::move(wrapped)); } int NumThreads() const override { return pool_->NumThreads(); } int CurrentThreadId() const override { return pool_->CurrentThreadId(); } @@ -292,16 +301,16 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // graph. const int node_count = graph_->node_size(); for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_->mutable_node(i); - const string op = node.op(); + NodeDef* node = graph_->mutable_node(i); + const string op = node->op(); if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") { continue; } const std::vector& output = - properties.GetOutputProperties(node.name()); + properties.GetOutputProperties(node->name()); const std::vector& input = - properties.GetInputProperties(node.name()); + properties.GetInputProperties(node->name()); if (input.empty() || output.empty()) { continue; } @@ -328,35 +337,35 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // could have multiple outputs). if (op == "Shape" || op == "Size" || op == "Rank") { // Replace the node with the corresponding constant. - node.set_op("Const"); - node.clear_attr(); - (*node.mutable_attr())["dtype"].set_type(type); + node->set_op("Const"); + node->clear_attr(); + (*node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( - (*node.mutable_attr())["value"].mutable_tensor()); + (*node->mutable_attr())["value"].mutable_tensor()); // Turn the data input into a control dependency: this is needed to // ensure that the constant value will only be run in the // cases where the shape/rank/size would have been run in // the original graph. Additional inputs are extra control string ctrl_dep = - AddControlDependency(node.input(0), graph_, node_map_.get()); - node.set_input(0, ctrl_dep); - node_map_->AddOutput(NodeName(ctrl_dep), node.name()); + AddControlDependency(node->input(0), graph_, node_map_.get()); + node->set_input(0, ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), node->name()); } else { - auto outputs = node_map_->GetOutputs(node.name()); + auto outputs = node_map_->GetOutputs(node->name()); for (const auto& output : outputs) { for (int k = 0; k < output->input_size(); ++k) { int port; string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port == j) { + if (node_name == node->name() && port == j) { // Create a const node as ShapeN's output if not already. const string const_name = - OptimizedNodeName(node, strings::StrCat("-matshapes-", j)); + OptimizedNodeName(*node, strings::StrCat("-matshapes-", j)); if (node_map_->GetNode(const_name) == nullptr) { NodeDef* added_node = graph_->add_node(); added_node->set_name(const_name); added_node->set_op("Const"); - added_node->set_device(node.device()); + added_node->set_device(node->device()); node_map_->AddNode(added_node->name(), added_node); (*added_node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( @@ -364,7 +373,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We add a control dependency to the original ShapeN node, // so that the node will only be run if all inputs of the // original ShapeN node are run. - string ctrl_dep = AddControlDependency(node.name(), graph_, + string ctrl_dep = AddControlDependency(node->name(), graph_, node_map_.get()); *added_node->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); @@ -679,7 +688,7 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { nodes_whitelist_.find(node.name()) == nodes_whitelist_.end()) { return false; } - // Skip control flow nodes, they can't be folded + // Skip control flow nodes, they can't be folded. if (ModifiesFrameInfo(node)) { return false; } @@ -688,12 +697,16 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Skips ops that don't benefit from folding. - const string& op = node.op(); + // Don't fold stateful ops such as TruncatedNormal. + if (!IsFreeOfSideEffect(node)) { + return false; + } - if (op.find("Placeholder") == 0) { + // Skips ops that don't benefit from folding. + if (IsPlaceholder(node)) { return false; } + const string& op = node.op(); if (op.find("Save") != string::npos || op.find("Restore") != string::npos || op.find("Reader") != string::npos) { return false; @@ -705,16 +718,12 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Don't fold stateful ops such as TruncatedNormal. const OpDef* op_def = nullptr; Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); if (!status.ok()) { return false; } - if (op_def->is_stateful()) { - return false; - } - + // Don't fold ops without outputs. if (op_def->output_arg_size() == 0) { return false; } @@ -779,8 +788,11 @@ Status CreateConstantTensorAttrValue(DataType type, double value, SET_TENSOR_VAL_CASE(DT_FLOAT, float, float); SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double); SET_TENSOR_VAL_CASE(DT_INT64, int64, int64); + SET_TENSOR_VAL_CASE(DT_UINT64, int64, int64); SET_TENSOR_VAL_CASE(DT_INT32, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT32, int32, int); SET_TENSOR_VAL_CASE(DT_INT16, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT16, int32, int); SET_TENSOR_VAL_CASE(DT_INT8, int32, int); SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); @@ -843,10 +855,16 @@ Status ConstantFolding::CreateNodeDef(const string& name, POPULATE_TENSOR_PROTO(tensor, t, double, double); case DT_INT64: POPULATE_TENSOR_PROTO(tensor, t, int64, int64); + case DT_UINT64: + POPULATE_TENSOR_PROTO(tensor, t, uint64, int64); case DT_INT32: POPULATE_TENSOR_PROTO(tensor, t, int32, int); + case DT_UINT32: + POPULATE_TENSOR_PROTO(tensor, t, uint32, int); case DT_INT16: POPULATE_TENSOR_PROTO(tensor, t, int16, int); + case DT_UINT16: + POPULATE_TENSOR_PROTO(tensor, t, uint16, int); case DT_INT8: POPULATE_TENSOR_PROTO(tensor, t, int8, int); case DT_UINT8: @@ -1166,9 +1184,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { std::unordered_set processed_nodes; std::deque queue; for (int i = 0; i < graph_->node_size(); i++) { - auto node = graph_->mutable_node(i); - if (IsFoldable(*node)) { - queue.push_back(node); + if (IsFoldable(graph_->node(i))) { + queue.push_back(graph_->mutable_node(i)); } } while (!queue.empty()) { @@ -1203,8 +1220,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { int last = output->node_size() - 1; for (int i = output->node_size() - 1; i >= 0; --i) { const NodeDef& node = output->node(i); - auto outputs = node_map_->GetOutputs(node.name()); - if (outputs.empty()) { + auto fanout = node_map_->GetOutputs(node.name()); + if (fanout.empty()) { output->mutable_node()->SwapElements(i, last); last--; } @@ -1216,8 +1233,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { // If no fetch nodes is provided, we conservatively // keep all nodes in the original graph in case users need to fetch // their values. - auto outputs = node_map_->GetOutputs(node.name()); - if (!outputs.empty() || !has_fetch_ || + auto fanout = node_map_->GetOutputs(node.name()); + if (!fanout.empty() || !has_fetch_ || nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { auto added_node = output->add_node(); *added_node = node; @@ -1331,14 +1348,14 @@ bool ConstantFolding::IsOnes(const NodeDef& node) const { // IS_ONES_CASE(DT_HALF); IS_ONES_CASE(DT_FLOAT); IS_ONES_CASE(DT_DOUBLE); + IS_ONES_CASE(DT_COMPLEX64); + IS_ONES_CASE(DT_COMPLEX128); IS_ONES_CASE(DT_UINT8); IS_ONES_CASE(DT_INT8); IS_ONES_CASE(DT_UINT16); IS_ONES_CASE(DT_INT16); IS_ONES_CASE(DT_INT32); IS_ONES_CASE(DT_INT64); - IS_ONES_CASE(DT_COMPLEX64); - IS_ONES_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1362,14 +1379,14 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const { // IS_ZEROS_CASE(DT_HALF); IS_ZEROS_CASE(DT_FLOAT); IS_ZEROS_CASE(DT_DOUBLE); + IS_ZEROS_CASE(DT_COMPLEX64); + IS_ZEROS_CASE(DT_COMPLEX128); IS_ZEROS_CASE(DT_UINT8); IS_ZEROS_CASE(DT_INT8); IS_ZEROS_CASE(DT_UINT16); IS_ZEROS_CASE(DT_INT16); IS_ZEROS_CASE(DT_INT32); IS_ZEROS_CASE(DT_INT64); - IS_ZEROS_CASE(DT_COMPLEX64); - IS_ZEROS_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1869,6 +1886,11 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index fdb03a5aae..312c1a41d3 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -105,7 +105,12 @@ REGISTER_KERNEL(GPU, int8); REGISTER_KERNEL(GPU, qint8); REGISTER_KERNEL(GPU, uint16); REGISTER_KERNEL(GPU, int16); +REGISTER_KERNEL(GPU, qint16); +REGISTER_KERNEL(GPU, quint16); +REGISTER_KERNEL(GPU, uint32); +REGISTER_KERNEL(GPU, qint32); REGISTER_KERNEL(GPU, int64); +REGISTER_KERNEL(GPU, uint64); REGISTER_KERNEL(GPU, complex64); REGISTER_KERNEL(GPU, complex128); REGISTER_KERNEL(GPU, bool); @@ -122,9 +127,15 @@ REGISTER_SYCL_KERNEL(SYCL, float); REGISTER_SYCL_KERNEL(SYCL, double); REGISTER_SYCL_KERNEL(SYCL, uint8); REGISTER_SYCL_KERNEL(SYCL, int8); +REGISTER_SYCL_KERNEL(SYCL, qint8); REGISTER_SYCL_KERNEL(SYCL, uint16); REGISTER_SYCL_KERNEL(SYCL, int16); +REGISTER_SYCL_KERNEL(SYCL, qint16); +REGISTER_SYCL_KERNEL(SYCL, quint16); +REGISTER_SYCL_KERNEL(SYCL, uint32); +REGISTER_SYCL_KERNEL(SYCL, qint32); REGISTER_SYCL_KERNEL(SYCL, int64); +REGISTER_SYCL_KERNEL(SYCL, uint64); REGISTER_SYCL_KERNEL(SYCL, bool); #undef REGISTER_SYCL_KERNEL #endif -- GitLab From ccefd0a1307ac5dd39d0a254c49ce71f8c2b93e2 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 26 Feb 2018 19:57:42 -0800 Subject: [PATCH 119/311] Fixes and simplification in the Keras training engine. - Explicitly disallow sample/class weighting in eager (it was never supported) - Remove tests for it (which were actually ignoring sample/class weights) - Make sample weight placeholders placeholder_with_default, and do not create all-ones numpy arrays to feed them when no sample weights are provided (this might lead to better performance) PiperOrigin-RevId: 187121215 --- .../python/keras/_impl/keras/backend.py | 11 +- .../python/keras/_impl/keras/callbacks.py | 20 +- .../keras/_impl/keras/engine/training.py | 151 +++--- .../_impl/keras/engine/training_eager.py | 17 +- .../_impl/keras/engine/training_eager_test.py | 436 ------------------ .../keras/_impl/keras/engine/training_test.py | 8 - 6 files changed, 110 insertions(+), 533 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index a2db05f6cf..2b75666b9e 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -2749,7 +2749,7 @@ class Function(object): self.updates_op = control_flow_ops.group(*updates_ops) self.name = name # additional tensor substitutions - self.feed_dict = session_kwargs.pop('feed_dict', {}) + self.feed_dict = session_kwargs.pop('feed_dict', None) # additional operations self.fetches = session_kwargs.pop('fetches', []) if not isinstance(self.fetches, list): @@ -2759,8 +2759,15 @@ class Function(object): def __call__(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') - feed_dict = self.feed_dict.copy() + + if self.feed_dict: + feed_dict = self.feed_dict.copy() + else: + feed_dict = {} + for tensor, value in zip(self.inputs, inputs): + if value is None: + continue if is_sparse(tensor): sparse_coo = value.tocoo() indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index f6c4661425..deb1e8867d 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -778,16 +778,24 @@ class TensorBoard(Callback): while i < val_size: step = min(self.batch_size, val_size - i) batch_val = [] - batch_val.append(val_data[0][i:i + step]) - batch_val.append(val_data[1][i:i + step]) - batch_val.append(val_data[2][i:i + step]) + batch_val.append(val_data[0][i:i + step] + if val_data[0] is not None else None) + batch_val.append(val_data[1][i:i + step] + if val_data[1] is not None else None) + batch_val.append(val_data[2][i:i + step] + if val_data[2] is not None else None) if self.model.uses_learning_phase: # do not slice the learning phase - batch_val = [x[i:i + step] for x in val_data[:-1]] + batch_val = [x[i:i + step] if x is not None else None + for x in val_data[:-1]] batch_val.append(val_data[-1]) else: - batch_val = [x[i:i + step] for x in val_data] - feed_dict = dict(zip(tensors, batch_val)) + batch_val = [x[i:i + step] if x is not None else None + for x in val_data] + feed_dict = {} + for key, val in zip(tensors, batch_val): + if val is not None: + feed_dict[key] = val result = self.sess.run([self.merged], feed_dict=feed_dict) summary_str = result[0] self.writer.add_summary(summary_str, epoch) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 57451ad470..63bea08ac5 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -40,6 +40,7 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.layers.base import _DeferredTensor +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.util.tf_export import tf_export @@ -225,9 +226,9 @@ def _check_array_lengths(inputs, targets, weights=None): # return a set with the variation between # different shapes, with None => 0 if x is None: - return {0} + return {} else: - return set([0 if y is None else y.shape[0] for y in x]) + return set([y.shape[0] for y in x if y is not None]) set_x = set_of_lengths(inputs) set_y = set_of_lengths(targets) @@ -259,7 +260,8 @@ def _check_array_lengths(inputs, targets, weights=None): def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): """Does validation on the compatibility of targets and loss functions. - This helps prevent users from using loss functions incorrectly. + This helps prevent users from using loss functions incorrectly. This check + is purely for UX purposes. Arguments: targets: list of Numpy arrays of targets. @@ -275,7 +277,7 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): losses.categorical_crossentropy } for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None: + if y is None or loss is None or tensor_util.is_tensor(y): continue if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: @@ -507,10 +509,7 @@ def _standardize_weights(y, (existing_classes - existing_class_weight)) return weights else: - if sample_weight_mode is None: - return np.ones((y.shape[0],), dtype=K.floatx()) - else: - return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx()) + return None @tf_export('keras.models.Model', 'keras.Model') @@ -862,12 +861,12 @@ class Model(Network): sample_weights.append(None) else: if sample_weight_mode == 'temporal': - sample_weights.append( - K.placeholder(ndim=2, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [[1.]], shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: - sample_weights.append( - K.placeholder(ndim=1, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [1.], shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] @@ -1314,7 +1313,7 @@ class Model(Network): for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1424,7 +1423,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): + if ins and isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1518,7 +1517,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -2070,10 +2069,6 @@ class Model(Network): val_y, sample_weight=val_sample_weight, batch_size=batch_size) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights elif validation_split and 0. < validation_split < 1.: do_validation = True @@ -2085,36 +2080,34 @@ class Model(Network): y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) sample_weights, val_sample_weights = (slice_arrays( sample_weights, 0, split_at), slice_arrays(sample_weights, split_at)) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights - elif validation_steps: + val_x = [] + val_y = [] + val_sample_weights = [] do_validation = True - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = [0.] - - # Prepare input arrays and training function. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights # Prepare display labels. out_labels = self.metrics_names if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + if do_validation: + if any([w is not None for w in val_sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported' + ' when eager execution is enabled, for now.') callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + val_ins = val_x + val_y else: callback_metrics = copy.copy(out_labels) return training_eager.fit_loop( self, - ins, + x + y, out_labels=out_labels, batch_size=batch_size, epochs=epochs, @@ -2127,18 +2120,25 @@ class Model(Network): steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) else: + # Prepare input arrays and training function. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() f = self.train_function if do_validation: - if context.in_graph_mode(): - self._make_test_function() - val_f = self.test_function - else: - val_f = None + self._make_test_function() + val_f = self.test_function callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + val_ins = val_x + val_y + val_sample_weights + [0] + else: + val_ins = val_x + val_y + val_sample_weights else: val_f = None callback_metrics = copy.copy(out_labels) @@ -2229,16 +2229,20 @@ class Model(Network): y, sample_weight=sample_weight, batch_size=batch_size) - # Prepare inputs, delegate logic to `_test_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') return training_eager.test_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x + y, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_test_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights + self._make_test_function() f = self.test_function return self._test_loop( @@ -2276,16 +2280,16 @@ class Model(Network): 'argument.') x, _, _ = self._standardize_user_data(x) - # Prepare inputs, delegate logic to `_predict_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): return training_eager.predict_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_predict_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() f = self.predict_function @@ -2327,20 +2331,26 @@ class Model(Network): and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. + Raises: + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.train_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.train_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() outputs = self.train_function(ins) @@ -2377,18 +2387,21 @@ class Model(Network): the display labels for the scalar outputs. Raises: - ValueError: in case of invalid arguments. + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.test_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.test_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights self._make_test_function() outputs = self.test_function(ins) @@ -2408,14 +2421,9 @@ class Model(Network): """ x, _, _ = self._standardize_user_data(x) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): ins_batch_converted = [] - for ib in ins: + for ib in x: ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] @@ -2426,6 +2434,11 @@ class Model(Network): return outs if context.in_graph_mode(): + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() outputs = self.predict_function(ins) if len(outputs) == 1: @@ -2643,7 +2656,7 @@ class Model(Network): val_data = val_x + val_y + val_sample_weights if self.uses_learning_phase and not isinstance( K.learning_phase(), int): - val_data += [0.] + val_data += [0] for cbk in callbacks: cbk.validation_data = val_data diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 282dd0dc0d..cdf189adef 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -139,6 +139,8 @@ def _model_loss(model, inputs, targets, training=False): model.output_names[i]) loss_metrics.append(K.mean(output_loss)) + # TODO(fchollet): support masking; in practice `_keras_mask` is never + # set in this context currently. mask = outs[i]._keras_mask # adapted from weighted_loss_fn if mask is not None: @@ -148,17 +150,7 @@ def _model_loss(model, inputs, targets, training=False): # to the number of unmasked samples. output_loss /= K.mean(mask) - # adapted from weighted_loss_fn - # apply sample weighting - if model.sample_weights: - # reduce score_array to same ndim as weight array - ndim = K.ndim(output_loss) - weight_ndim = K.ndim(model.sample_weights) - output_loss = K.mean(output_loss, axis=list(range(weight_ndim, ndim))) - output_loss *= model.sample_weights - output_loss /= K.mean(K.cast(K.not_equal(model.sample_weights, 0), - K.floatx())) - output_loss = K.mean(output_loss) + # TODO(fchollet): support sample weighting loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -231,7 +223,8 @@ def train_on_batch(model, ins): """ ins_batch_converted = [] for ib in ins: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) + if ib is not None: + ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] eager_model_outputs = [] for i in range(len(model.inputs)): diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 3d94b7537f..550b86a71d 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -24,9 +24,7 @@ import numpy as np from tensorflow.python.framework import ops from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils -from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.rmsprop import RMSPropOptimizer @@ -311,440 +309,6 @@ class TrainingTest(test.TestCase): optimizer='rms') -class LossWeightingTest(test.TestCase): - - def test_class_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_test = y_test.copy() - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) - test_ids = np.where(int_y_test == np.array(weighted_class))[0] - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - class_weight=class_weight, - validation_data=(x_train, y_train, sample_weight)) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight, - validation_split=0.1) - - model.train_on_batch( - x_train[:batch_size], y_train[:batch_size], class_weight=class_weight) - ref_score = model.evaluate(x_test, y_test, verbose=0) - score = model.evaluate( - x_test[test_ids, :], y_test[test_ids, :], verbose=0) - self.assertLess(score, ref_score) - - def test_sample_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(43) - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight, - validation_split=0.1) - model.train_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - model.test_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - - def test_temporal_sample_weights(self): - num_classes = 5 - weighted_class = 3 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - - np.random.seed(1337) - (_, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode='temporal') - - def test_class_weight_invalid_use_case(self): - num_classes = 5 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - class_weight = dict([(i, 1.) for i in range(num_classes)]) - - del class_weight[1] - with self.assertRaises(ValueError): - model.fit(x_train, y_train, - epochs=0, verbose=0, class_weight=class_weight) - - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode=[]) - - # Build multi-output model - x = keras.Input((3,)) - y1 = keras.layers.Dense(4, name='1')(x) - y2 = keras.layers.Dense(4, name='2')(x) - model = keras.models.Model(x, [y1, y2]) - model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse') - x_np = np.random.random((10, 3)) - y_np = np.random.random((10, 4)) - w_np = np.random.random((10,)) - # This will work - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': w_np}) - # These will not - with self.assertRaises(ValueError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=[w_np]) - with self.assertRaises(TypeError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=w_np) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((11,)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - - -class TestDynamicTrainability(test.TestCase): - - def test_trainable_warning(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3)) - model.trainable = False - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - model.trainable = True - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - - def test_trainable_argument(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3, trainable=False)) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - # test with nesting - inputs = keras.layers.Input(shape=(3,)) - output = model(inputs) - model = keras.models.Model(inputs, output) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - def test_layer_trainability_switch(self): - # with constructor argument, in Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, trainable=False, input_dim=1)) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Sequential - model = keras.models.Sequential() - layer = keras.layers.Dense(2, input_dim=1) - model.add(layer) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # with constructor argument, in Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2, trainable=False)(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Model - x = keras.layers.Input(shape=(1,)) - layer = keras.layers.Dense(2) - y = layer(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_model_trainability_switch(self): - # a non-trainable model has no trainable weights - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - model = keras.models.Model(x, y) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # same for Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=1)) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_nested_model_trainability(self): - - # a Sequential inside a Model - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Sequential inside a Sequential - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Sequential - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - -class TestTrainingUtils(test.TestCase): - - def test_check_array_lengths(self): - keras.engine.training._check_array_lengths(None, None, None) - a_np = np.random.random((4, 3, 3)) - keras.engine.training._check_array_lengths(a_np, a_np, a_np) - keras.engine.training._check_array_lengths( - [a_np, a_np], [a_np, a_np], [a_np, a_np]) - keras.engine.training._check_array_lengths([None], [None], [None]) - - b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) - - def test_slice_arrays(self): - input_a = np.random.random((10, 3)) - slice_arrays(None) - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None, [1, 1], None, [1, 1]] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = None - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - - def test_fit_with_BatchNorm(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_dim=4)) - model.add(keras.layers.BatchNormalization()) - model.add(keras.layers.Activation('tanh')) - model.add(keras.layers.Dropout(0.2)) - - input_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 10)) - - model.compile(loss='binary_crossentropy', optimizer=RMSPropOptimizer(0.001)) - model.fit(input_a_np, output_b_np, epochs=1, batch_size=5, verbose=0) - - def test_fit_with_regularization(self): - model = keras.models.Sequential() - with self.assertRaises(ValueError): - model.add( - keras.layers.Dense(4, input_dim=3, - kernel_regularizer=keras.regularizers.l2(0.01), - activity_regularizer=keras.regularizers.l1(0.01))) - - if __name__ == '__main__': # Bazel sets these environment variables to very long paths. # Tempfile uses them to create long paths, and in turn multiprocessing diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index 9651eb9f14..6ca5941e9a 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -1045,16 +1045,8 @@ class TestTrainingUtils(test.TestCase): keras.engine.training._check_array_lengths([None], [None], [None]) b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) with self.assertRaises(ValueError): keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) def test_slice_arrays(self): input_a = np.random.random((10, 3)) -- GitLab From 78d10e5800a058c6d1865c5282aaa4094f7bc36d Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 19:58:18 -0800 Subject: [PATCH 120/311] Fix bug in deserializing CondContexts. PiperOrigin-RevId: 187121244 --- tensorflow/python/ops/control_flow_ops.py | 11 ++++- tensorflow/python/training/saver_test.py | 49 ++++++++++++++++------- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index b16901effd..0815527c96 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1716,8 +1716,15 @@ class CondContext(ControlFlowContext): self._pivot = g.as_graph_element( ops.prepend_name_scope(context_def.pivot_name, import_scope)) self._branch = context_def.branch - super(CondContext, self).__init__( - values_def=context_def.values_def, import_scope=import_scope) + super(CondContext, self).__init__(values_def=context_def.values_def, + import_scope=import_scope) + # The predicate and pivot ops appear in self._values, but don't have self + # set as their control context. The __init__ call above will set self for + # all values, so manually override the predicate and pivot contexts here. + # pylint: disable=protected-access + self._pred.op._set_control_flow_context(self.outer_context) + self._pivot.op._set_control_flow_context(self.outer_context) + # pylint: enable=protected-access @property def pred(self): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index b366ed30f3..b758ceaab0 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2041,29 +2041,24 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) - def testNestedWhileLoops(self): - test_dir = self._get_test_dir("nested_whiles") + def _testWhileLoopAndGradientSerDes(self, outer_body_fn): + # Build a while loop with `outer_body_fn`, export it, and verify that it can + # be imported and the gradient can be built and run correctly. + + test_dir = self._get_test_dir("nested_control_flow") filename = os.path.join(test_dir, "metafile") saver_ckpt = os.path.join(test_dir, "saver.ckpt") - # Create two simple nested while loops. + # Create while loop using `outer_body_fn`. with ops_lib.Graph().as_default(): - def body(i, x): - _, r = control_flow_ops.while_loop(lambda j, y: j < 3, - lambda j, y: (j + 1, y + x), - [0, 0]) - return i + 1, x + r - var = variables.Variable(0) var_name = var.name - - _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, outer_body_fn, [0, var]) output_name = output.name - init_op = variables.global_variables_initializer() - # Generate a MetaGraphDef containing the nested loops. + # Generate a MetaGraphDef containing the while loop. with session.Session() as sess: sess.run(init_op) sess.run(output) @@ -2071,8 +2066,8 @@ class MetaGraphTest(test.TestCase): saver.save(sess, saver_ckpt) saver.export_meta_graph(filename) - # Build and run the gradients of the nested while loop. We use this below - # to verify that the gradients are correct with an imported MetaGraphDef. + # Build and run the gradients of the while loop. We use this below to + # verify that the gradients are correct with an imported MetaGraphDef. grad = gradients_impl.gradients([output], [var]) with session.Session() as sess: sess.run(init_op) @@ -2096,6 +2091,30 @@ class MetaGraphTest(test.TestCase): actual_grad_value = sess.run(grad) self.assertEqual(expected_grad_value, actual_grad_value) + def testNestedWhileLoopsSerDes(self): + # Test two simple nested while loops. + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + self._testWhileLoopAndGradientSerDes(body) + + def testNestedControlFlowSerDes(self): + # Test while loop in a cond in a while loop. + # pylint: disable=g-long-lambda + def body(i, x): + cond_result = control_flow_ops.cond( + i > 0, + lambda: control_flow_ops.while_loop( + lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0])[1], + lambda: x) + return i + 1, cond_result + # pylint: enable=g-long-lambda + self._testWhileLoopAndGradientSerDes(body) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From 7b15f7a55dcd5e908211e86ec42b49136b1ccc25 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 20:21:07 -0800 Subject: [PATCH 121/311] Add helpers to stream data from the GCE VM to a Cloud TPU. PiperOrigin-RevId: 187122870 --- tensorflow/contrib/tpu/BUILD | 28 +++ tensorflow/contrib/tpu/python/tpu/datasets.py | 192 ++++++++++++++++++ .../contrib/tpu/python/tpu/datasets_test.py | 181 +++++++++++++++++ 3 files changed, 401 insertions(+) create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets.py create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets_test.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index c48e84ddfa..095b4821f1 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -163,6 +163,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":datasets", ":profiler", ":tpu_py", "//tensorflow/contrib/tpu/proto:topology_proto_py", @@ -181,6 +182,33 @@ py_library( ], ) +py_library( + name = "datasets", + srcs = [ + "python/tpu/datasets.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:function", + "//tensorflow/python:functional_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", + ], +) + +tf_py_test( + name = "datasets_test", + srcs = ["python/tpu/datasets_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + ":datasets", + ], + grpc_enabled = True, +) + tf_py_test( name = "tpu_test", size = "small", diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py new file mode 100644 index 0000000000..29aea98542 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -0,0 +1,192 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ====================================== +"""Library of Cloud TPU helper functions for data loading.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import functional_ops + + +def _TextLineDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TextLineDataset(filename, buffer_size=buffer_size) + return dataset + + +def _TFRecordDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TFRecordDataset(filename, buffer_size=buffer_size) + return dataset + + +_FILETYPE_MAP = { + 'tfrecord': _TFRecordDataset, + 'textline': _TextLineDataset, + 'text': _TextLineDataset, +} + + +def StreamingFilesDataset(files, + filetype=None, + file_reader_job=None, + worker_job=None, + num_epochs=None, + filename_shuffle_buffer_size=None, + num_parallel_reads=None, + batch_transfer_size=None, + sloppy=None): + """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM). + + Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read + files local to your GCE VM. In order to train using files stored on your local + VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset + helper to generate a dataset to feed your Cloud TPU with files from your GCE + VM. + + The resulting dataset may return an OutOfRangeError if there are no files + found as a result of the fileglob expansion. + + Note: StreamingFilesDataset assumes that the session is using a + TPUClusterResolver and has therefore a worker and a coordinator job. File + loading will be done on the coordinator job. + + Args: + files: A string glob to match files, or a `tf.data.Dataset` generating file + names. + filetype: A string (one of 'tfrecord', or 'textline') or a single-argument + TensorFlow function that when given a filename returns a dataset. + file_reader_job: An optional string that corresponds to the job that should + perform the file reads. + worker_job: An optional string that corresponds to the job that should + process the tensors (i.e. your GPU or TPU worker). + num_epochs: The number of epochs through the training set that should be + generated. By default, it will repeat infinitely. + filename_shuffle_buffer_size: An optional integer whose value controls the + shuffling of the file names. If you would like to read from the files in + the same order, set to 0 or False. + num_parallel_reads: An optional integer controlling the number of files to + read from concurrently. (Set to 1 for no parallelism.) + batch_transfer_size: An optional integer controlling the batching used to + amortize the remote function invocation overhead. Set to a very large + number to increase throughput. Set to a very small number to reduce memory + consumption. Set to False to skip batching. + sloppy: (Optional.) If `True`, read input data as fast as possible, without + maintaining a deterministic order. Defaults to `False`. + Returns: + A `tf.data.Dataset` with an infinite stream of elements generated by a + parallel interleaving of the set of files matched (or generated) by `files` + with a type is the output of the dataset specified by `filetype`. + + Raises: + ValueError: if any argument is not of the expected type. + """ + if filetype is None: + filetype = 'tfrecord' + + if isinstance(filetype, str): + if filetype not in _FILETYPE_MAP: + raise ValueError('Unexpected filetype: %s' % filetype) + reader_fn = _FILETYPE_MAP[filetype] + elif callable(filetype): + reader_fn = filetype + else: + raise ValueError('filetype should be a string or a callable') + + file_reader_job = file_reader_job or 'coordinator' + + worker_job = worker_job or 'worker' + + if filename_shuffle_buffer_size is None: + filename_shuffle_buffer_size = 4096 + + num_parallel_reads = num_parallel_reads or 8 + + if batch_transfer_size is None: + batch_transfer_size = 1024 + + if sloppy is None: + sloppy = False + + with ops.device('/job:%s' % file_reader_job): + if isinstance(files, str): + source_dataset = dataset_ops.Dataset.list_files(files) + elif isinstance(files, dataset_ops.Dataset): + source_dataset = files + else: + raise ValueError('files was not a string or a dataset: %s' % files) + + if filename_shuffle_buffer_size: + source_dataset = source_dataset.shuffle( + buffer_size=filename_shuffle_buffer_size) + + # NOTE: We perform the `repeat` on the source dataset, because the output + # dataset does not currently have enough information to recreate an iterator + # over the source dataset when it reaches the end. + source_dataset = source_dataset.repeat(num_epochs) + + source_dataset = source_dataset.apply( + interleave_ops.parallel_interleave( + reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy)) + + if batch_transfer_size: + # Note: we can safely call batch_and_drop_remainder because we have an + # infinite stream of TFRecords. + source_dataset = source_dataset.apply( + batching.batch_and_drop_remainder(batch_transfer_size)) + + source_dataset = source_dataset.prefetch(1) + + source_iterator = source_dataset.make_one_shot_iterator() + source_handle = source_iterator.string_handle() + + @function.Defun(dtypes.string) + def LoadingFunc(h): + remote_iterator = iterator_ops.Iterator.from_string_handle( + h, source_dataset.output_types, source_dataset.output_shapes) + return remote_iterator.get_next() + + def MapFn(unused_input): + return functional_ops.remote_call( + args=[source_handle], + Tout=[dtypes.string], + f=LoadingFunc, + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + + with ops.device('/job:%s' % worker_job): + # TODO(saeta,mrry): Switch to using _GeneratorDataset. + + # identity = lambda x: x + # dummy = constant_op.constant(0) + # output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn, + # identity) + + output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn) + output_dataset = output_dataset.prefetch(1) + + if batch_transfer_size: + # Undo the batching used during the transfer. + output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1) + + return output_dataset diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py new file mode 100644 index 0000000000..2c40797792 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -0,0 +1,181 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU datasets tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.tpu.python.tpu import datasets +from tensorflow.core.protobuf import cluster_pb2 +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.lib.io import python_io +from tensorflow.python.platform import test +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat + +_NUM_FILES = 10 +_NUM_ENTRIES = 200 + + +class DatasetsTest(test.TestCase): + + def setUp(self): + super(DatasetsTest, self).setUp() + self._coord = server_lib.Server.create_local_server() + self._worker = server_lib.Server.create_local_server() + + self._cluster_def = cluster_pb2.ClusterDef() + worker_job = self._cluster_def.job.add() + worker_job.name = 'worker' + worker_job.tasks[0] = self._worker.target[len('grpc://'):] + coord_job = self._cluster_def.job.add() + coord_job.name = 'coordinator' + coord_job.tasks[0] = self._coord.target[len('grpc://'):] + + session_config = config_pb2.ConfigProto(cluster_def=self._cluster_def) + + self._sess = session.Session(self._worker.target, config=session_config) + + def testTextLineDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'text_line.%d.txt' % i) + contents = [] + for j in range(_NUM_ENTRIES): + contents.append(compat.as_bytes('%d: %d' % (i, j))) + with open(filename, 'wb') as f: + f.write(b'\n'.join(contents)) + all_contents.extend(contents) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'text_line.*.txt'), filetype='text') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'tf_record*'), filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDatasetFromDataset(self): + filenames = [] + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + filenames.append(filename) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + filenames = dataset_ops.Dataset.from_tensor_slices(filenames) + + dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testArbitraryReaderFunc(self): + + def MakeRecord(i, j): + return compat.as_bytes('%04d-%04d' % (i, j)) + + record_bytes = len(MakeRecord(10, 200)) + + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'fixed_length.%d' % i) + with open(filename, 'wb') as f: + for j in range(_NUM_ENTRIES): + record = MakeRecord(i, j) + f.write(record) + all_contents.append(record) + + def FixedLengthFile(filename): + return readers.FixedLengthRecordDataset(filename, record_bytes) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'fixed_length*'), + filetype=FixedLengthFile) + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testUnexpectedFiletypeString(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype='foo') + + def testUnexpectedFiletypeType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype=3) + + def testUnexpectedFilesType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset(123, filetype='tfrecord') + + +if __name__ == '__main__': + test.main() -- GitLab From 557611cefba99a7c94dc7dd0932723c0a9f96087 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:09:30 -0800 Subject: [PATCH 122/311] Automated g4 rollback of changelist 187092622 PiperOrigin-RevId: 187125995 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +------------- tensorflow/c/eager/runtime.cc | 14 ++++---------- tensorflow/c/eager/runtime.h | 3 --- tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 13 insertions(+), 35 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 16a2a15072..e55cb672e9 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,7 +21,6 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ - "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index b233dd5b93..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = tensorflow::KernelAndDevice::Init( - ndef, ctx->func_lib(device), &ctx->runner, kernel); + status->status = + tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 29944df4c2..3356054cd0 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -46,15 +45,7 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : thread_pool(new tensorflow::thread::ThreadPool( - opts.session_options.options.env, "EagerCompute", - opts.session_options.options.config - .inter_op_parallelism_threads() != 0 - ? opts.session_options.options.config - .inter_op_parallelism_threads() - : tensorflow::port::NumSchedulableCPUs())), - runner([this](std::function f) { thread_pool->Schedule(f); }), - policy(opts.policy), + : policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -63,9 +54,6 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} - const std::unique_ptr thread_pool; - std::function)> runner; - const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index b9618420f0..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,22 +255,17 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; - out->runner_ = nullptr; - out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; - out->runner_ = runner; - out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -301,11 +296,10 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - if (runner_ == nullptr) { - params.runner = &default_runner_; - } else { - params.runner = runner_; - } + // TODO(apassos): use a thread pool. + std::function)> runner = + [](std::function f) { f(); }; + params.runner = &runner; OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index fa5f839977..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,7 +169,6 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -189,8 +188,6 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; - std::function)>* runner_; - std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index ab0b535e1a..643153058c 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel); + Status s = + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &k)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From 46306ad7bd02c613a59aa6074f830f0de011cfbf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:25:22 -0800 Subject: [PATCH 123/311] Improve error handling in strided_slice_op to fail more gracefully and return an error status instead of crashing. PiperOrigin-RevId: 187126888 --- tensorflow/core/kernels/strided_slice_op.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 7745effe2a..1e3e92a68a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -109,17 +109,27 @@ class StridedSliceOp : public OpKernel { if (is_identity) { VLOG(1) << "Strided slice identity "; Tensor tmp; - CHECK(tmp.CopyFrom(input, final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(input, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } // Optimization #2, slice is memory contiguous (only occurs in dim 0) if (slice_dim0 && IsDim0SliceAligned(input.shape(), begin[0], end[0])) { - CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true. + OP_REQUIRES(context, input.dims() >= 1, + errors::InvalidArgument( + "Input must have rank at least 1, got: ", input.dims())); + // Otherwise, is_identity should be true. VLOG(1) << "Strided slice dim 0: " << input.shape().DebugString(); + OP_REQUIRES( + context, begin[0] <= end[0], + errors::InvalidArgument("begin[0] (", begin[0], + ") must less or equal to end[0] (", end[0])); + Tensor slice = input.Slice(begin[0], end[0]); Tensor tmp; - CHECK(tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(slice, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } @@ -238,7 +248,8 @@ class StridedSliceGradOp : public OpKernel { if (processing_shape.dims() == 0) { auto in = context->input(4); - CHECK(result->CopyFrom(in, processing_shape)); + OP_REQUIRES(context, result->CopyFrom(in, processing_shape), + errors::Internal("Copy failed")); return; } -- GitLab From 129bb5400e20b322016c4a8f378da63be8d58e5e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 01:02:36 -0800 Subject: [PATCH 124/311] Add documentation to Grappler RewriterConfig to give a short description for each of the optimizer on what they do. PiperOrigin-RevId: 187143156 --- tensorflow/core/protobuf/rewriter_config.proto | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 504ed5d819..875e4663db 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -30,12 +30,17 @@ message RewriterConfig { } // Optimize tensor layouts (default is ON) + // e.g. This will try to use NCHW layout on GPU which is faster. Toggle layout_optimizer = 1; // Fold constants (default is ON) + // Statically infer the value of tensors when possible, and materialize the + // result using constants. Toggle constant_folding = 3; // Arithmetic optimizations (default is ON) + // e.g. Simplify arithmetic ops; merge ops with same value (like constants). Toggle arithmetic_optimization = 7; // Control dependency optimizations (default is ON). + // Remove redundant control dependencies, which may enable other optimization. Toggle dependency_optimization = 8; // Loop optimizations (default is OFF). Toggle loop_optimization = 9; @@ -49,12 +54,20 @@ message RewriterConfig { NO_MEM_OPT = 1; // Driven by manual op-level annotations. MANUAL = 2; + // Driven by heuristics. The behavior of these heuristics is subject to // change. Currently includes an experimental recomputation and swapping // heuristics. Manual annotations are respected, but additional nodes are // selected automatically. + + // Swapping heuristic will move a tensor from the GPU to the CPU and move + // it back when needed to reduce peak memory usage. SWAPPING_HEURISTICS = 4; + // Recomputation heuristics will recompute ops (such as Relu activation) + // during backprop instead of storing them, reducing peak memory usage. RECOMPUTATION_HEURISTICS = 5; + // Scheduling will split big ops such as AddN and try to enforce a schedule + // of the new computations that decreases peak memory usage. SCHEDULING_HEURISTICS = 6; // Use any combination of swapping and recomputation heuristics. HEURISTICS = 3; -- GitLab From efa9a8ec649c72887cd286a78b3a2bf95e34f924 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 06:00:21 -0800 Subject: [PATCH 125/311] Enable dynamic function calls. These are compiled just in time by inserting a call to compile. PiperOrigin-RevId: 187165096 --- tensorflow/contrib/py2tf/__init__.py | 4 +- tensorflow/contrib/py2tf/converters/BUILD | 13 +-- .../contrib/py2tf/converters/call_trees.py | 76 +++++++------- .../py2tf/converters/call_trees_test.py | 16 +++ .../py2tf/converters/converter_test_base.py | 32 ++++-- tensorflow/contrib/py2tf/impl/api.py | 99 ++++++++++++++----- 6 files changed, 163 insertions(+), 77 deletions(-) diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/py2tf/__init__.py index 379fa7fd5c..6531183cb5 100644 --- a/tensorflow/contrib/py2tf/__init__.py +++ b/tensorflow/contrib/py2tf/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl.api import convert +from tensorflow.contrib.py2tf.impl.api import converted_call from tensorflow.contrib.py2tf.impl.api import graph_ready from tensorflow.contrib.py2tf.impl.api import to_code from tensorflow.contrib.py2tf.impl.api import to_graph @@ -30,7 +31,8 @@ from tensorflow.contrib.py2tf.pyct.transformer import PyFlowParseError from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'to_graph', 'to_code', 'convert', 'graph_ready', 'utils', 'PyFlowParseError' + 'to_graph', 'to_code', 'convert', 'graph_ready', 'converted_call', 'utils', + 'PyFlowParseError' ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 42baaaaba7..78f46bc05f 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -46,6 +46,7 @@ py_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":converters", + "//tensorflow/contrib/py2tf/pyct", "//tensorflow/contrib/py2tf/pyct/static_analysis", "//tensorflow/contrib/py2tf/utils", "@gast_archive//:gast", @@ -59,7 +60,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -70,7 +70,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -81,7 +80,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -92,7 +90,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/py2tf/impl", "//tensorflow/python:client_testlib", ], ) @@ -103,7 +101,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -114,7 +111,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -125,7 +121,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -136,7 +131,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -157,7 +151,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -168,7 +161,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -184,7 +176,6 @@ py_test( ], deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index 1050ba654c..f18f9f6086 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -27,6 +27,7 @@ import types import gast from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import templates from tensorflow.contrib.py2tf.pyct import transformer @@ -72,9 +73,8 @@ class CallTreeTransformer(transformer.Base): self.uncompiled_modules = uncompiled_modules self.nocompile_decorators = nocompile_decorators - # pylint:disable=invalid-name - def _resolve_name(self, node): + """Used to resolve decorator info.""" if isinstance(node, gast.Call): return self._resolve_name(node.func) if isinstance(node, gast.Name): @@ -99,7 +99,13 @@ class CallTreeTransformer(transformer.Base): (owner_type, node.attr)) return None + def _function_is_compilable(self, target_entity): + """Determines whether an entity can be compiled at all.""" + # TODO(mdan): This is just a placeholder. Implement. + return not isinstance(target_entity, types.BuiltinFunctionType) + def _should_compile(self, node, fqn): + """Determines whether an entity should be compiled in the context.""" for i in range(1, len(fqn)): if fqn[:i] in self.uncompiled_modules: return False @@ -141,33 +147,6 @@ class CallTreeTransformer(transformer.Base): return True - def _determine_function_owner(self, m): - # TODO(mdan): The parent type should be known at analysis. Use that instead. - if hasattr(m, 'im_class'): # Python 2 - return m.im_class - if hasattr(m, '__qualname__'): # Python 3 - # Object attributes: should be bound to "self". - if hasattr(m, '__self__'): - return type(m.__self__) - - # Class attributes: should have the owner name in their namespace. - qn = m.__qualname__.split('.') - if len(qn) < 2: - return None - owner_name, func_name = qn[-2:] - if func_name != m.__name__: - raise ValueError('Inconsistent names detected ' - '(__qualname__[1] = "%s", __name__ = "%s") for %s.' % - (func_name, m.__name__, m)) - if owner_name == '': - return None - if owner_name not in self.context.namespace: - raise ValueError( - 'Could not resolve name "%s" while analyzing %s. Namespace:\n%s' % - (owner_name, m, self.context.namespace)) - return self.context.namespace[owner_name] - return None - def _rename_compilable_function(self, node): assert anno.hasanno(node.func, 'live_val') assert anno.hasanno(node.func, 'fqn') @@ -182,7 +161,11 @@ class CallTreeTransformer(transformer.Base): target_fqn, live_entity=target_entity) do_rename = True else: - owner_type = self._determine_function_owner(target_entity) + if anno.hasanno(node.func, 'parent_type'): + owner_type = anno.getanno(node.func, 'parent_type') + else: + # Fallback - not reliable. + owner_type = inspect_utils.getmethodclass(target_entity) new_name, do_rename = self.context.namer.compiled_function_name( target_fqn, live_entity=target_entity, owner_type=owner_type) @@ -202,9 +185,32 @@ class CallTreeTransformer(transformer.Base): """ return templates.replace(template, func=node.func, original_args=node.args) - def _function_is_compilable(self, target_entity): - # TODO(mdan): This is just a placeholder. Implement. - return not isinstance(target_entity, types.BuiltinFunctionType) + def _converted_call(self, node): + """Inlines a dynamic conversion for a dynamic function.""" + # TODO(mdan): Pass information on the statically compiled functions. + # Having access to the statically compiled functions can help avoid + # unnecessary compilation. + # For example, this would lead to function `a` being compiled twice: + # + # def a(): + # v = b + # b() + # def b(): + # a() + # + # This is really a problem with recursive calls, which currently can + # only be gated by a static condition, and should be rare. + # TODO(mdan): It probably makes sense to use dynamic conversion every time. + # Before we could convert all the time though, we'd need a reasonable + # caching mechanism. + template = """ + py2tf_api.converted_call(func, True, False, {}, original_args) + """ + call_expr = templates.replace( + template, func=node.func, original_args=node.args) + return call_expr[0].value + + # pylint:disable=invalid-name def visit_Expr(self, node): if isinstance(node.value, gast.Call): @@ -245,9 +251,9 @@ class CallTreeTransformer(transformer.Base): raise NotImplementedError('py_func with return values') else: if self.context.recursive: - raise NotImplementedError('Could not resolve target function.') + node = self._converted_call(node) else: - # TODO(mdan): Double check. Is this reachable code? + # Unresolved functions are allowed in non-recursive mode. pass return node diff --git a/tensorflow/contrib/py2tf/converters/call_trees_test.py b/tensorflow/contrib/py2tf/converters/call_trees_test.py index 777648dc0b..d482a9ef78 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees_test.py +++ b/tensorflow/contrib/py2tf/converters/call_trees_test.py @@ -47,6 +47,21 @@ class CallTreesTest(converter_test_base.TestCase): result.renamed_test_fn_1 = renamed_test_fn_1 self.assertEquals(3, result.test_fn_2(1)) + def test_dynamic_function(self): + + def test_fn_1(): + raise ValueError('This should be masked by the mock.') + + def test_fn_2(f): + return f() + 3 + + node = self.parse_and_analyze(test_fn_2, {}) + node = call_trees.transform(node, self.ctx, (), ()) + + with self.compiled(node) as result: + # 10 = 7 (from the mock) + 3 (from test_fn_2) + self.assertEquals(10, result.test_fn_2(test_fn_1)) + def test_simple_methods(self): class TestClass(object): @@ -59,6 +74,7 @@ class CallTreesTest(converter_test_base.TestCase): node = self.parse_and_analyze( TestClass.test_fn_2, {'TestClass': TestClass}, + namer=converter_test_base.FakeNoRenameNamer(), arg_types={'self': (TestClass.__name__, TestClass)}) node = call_trees.transform(node, self.ctx, (), ()) diff --git a/tensorflow/contrib/py2tf/converters/converter_test_base.py b/tensorflow/contrib/py2tf/converters/converter_test_base.py index afa5c2f96f..1f98d8469c 100644 --- a/tensorflow/contrib/py2tf/converters/converter_test_base.py +++ b/tensorflow/contrib/py2tf/converters/converter_test_base.py @@ -25,6 +25,7 @@ from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.pyct import compiler from tensorflow.contrib.py2tf.pyct import context from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import pretty_printer from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct.static_analysis import activity from tensorflow.contrib.py2tf.pyct.static_analysis import live_values @@ -52,26 +53,43 @@ class FakeNamer(object): return ('renamed_%s' % '_'.join(original_fqn)), True +class FakeNoRenameNamer(FakeNamer): + + def compiled_function_name(self, original_fqn, **_): + return str(original_fqn), False + + class TestCase(test.TestCase): """Base class for unit tests in this module. Contains relevant utilities.""" @contextlib.contextmanager def compiled(self, node, *symbols): - source = '' + source = None + + self.dynamic_calls = [] + def converted_call(*args): + """Mock version of api.converted_call.""" + self.dynamic_calls.append(args) + return 7 + try: result, source = compiler.ast_to_object(node) - result.tf = self.make_fake_tf(*symbols) + result.tf = self.make_fake_mod('fake_tf', *symbols) result.py2tf_utils = utils + result.py2tf_api = self.make_fake_mod('fake_api', converted_call) yield result except Exception: # pylint:disable=broad-except - print('Offending compiled code:\n%s' % source) + if source is None: + print('Offending AST:\n%s' % pretty_printer.fmt(node, color=False)) + else: + print('Offending compiled code:\n%s' % source) raise - def make_fake_tf(self, *symbols): - fake_tf = imp.new_module('fake_tf') + def make_fake_mod(self, name, *symbols): + fake_mod = imp.new_module(name) for s in symbols: - setattr(fake_tf, s.__name__, s) - return fake_tf + setattr(fake_mod, s.__name__, s) + return fake_mod def attach_namespace(self, module, **ns): for k, v in ns.items(): diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py index 29d2e038a7..48100aac32 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/py2tf/impl/api.py @@ -26,7 +26,9 @@ import six from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.impl import conversion from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.utils import builtins from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect @@ -110,28 +112,7 @@ def convert(recursive=False, verbose=False, arg_types=None): @wraps(f) def wrapper(*args, **kwargs): - """Wrapper that calls the compiled version of the wrapped function.""" - partial_types = () - arg_values = {} - arg_names = tf_inspect.getargspec(f)[0] - for name, arg in zip(arg_names, args): - arg_values[name] = arg - arg_class = arg.__class__ - # If arg_value_hints specifies any name, use that instead. - if name not in arg_types: - arg_types[name] = (arg_class.__name__, arg_class) - if name == 'self' and tf_inspect.isclass(arg_class): - # Annotated methods need to specify that their owner type is partial, - # otherwise other members they call will not be converted. - partial_types = (arg_class,) - wrapped = to_graph( - f, - recursive=recursive, - verbose=verbose, - arg_values=arg_values, - arg_types=arg_types, - partial_types=partial_types) - return wrapped(*args, **kwargs) + return converted_call(f, recursive, verbose, arg_types, *args, **kwargs) # Sometimes the decorator is just desugared, making it impossible to detect. # This attribute makes detection easier. @@ -141,6 +122,78 @@ def convert(recursive=False, verbose=False, arg_types=None): return decorator +def converted_call(f, recursive, verbose, arg_types, *args, **kwargs): + """Compiles a function call inline.""" + # TODO(mdan): This needs cleanup. + # In particular, we may want to avoid renaming functions altogether. + + if conversion.is_whitelisted_for_graph(f): + return f(*args, **kwargs) + + unknown_arg_value = object() # Sentinel for arguments of unknown value + + if tf_inspect.isbuiltin(f): + return builtins.dynamic_builtin(f, *args, **kwargs) + + if tf_inspect.isfunction(f) or tf_inspect.ismethod(f): + # Regular functions + target_entity = f + arg_map_target = f + effective_args = args + f_class = inspect_utils.getmethodclass(f) + + if f_class is not None: + partial_types = (f_class,) + else: + partial_types = () + + elif tf_inspect.isclass(f): + # Constructors + target_entity = f + arg_map_target = f.__init__ + effective_args = (unknown_arg_value,) + args + partial_types = () + + elif hasattr(f, '__call__') and hasattr(f, '__class__'): + # Callable objects + target_entity = f.__call__ + arg_map_target = f.__call__ + effective_args = (f,) + args + partial_types = (f.__class__,) + + else: + NotImplementedError('unknown callable type "%s"' % type(f)) + + arg_values = tf_inspect.getcallargs(arg_map_target, *args, **kwargs) + for name, arg in arg_values.items(): + if arg is unknown_arg_value: + continue + arg_class = arg.__class__ + # If arg_value_hints specifies any name, use that instead. + if name not in arg_types: + arg_types[name] = (arg_class.__name__, arg_class) + + # When called from within a decorator, this is the only indication that + # the function is a method - it appears that the decorator is applied + # before the method is bound. + if not partial_types: + if 'self' in arg_values: + if tf_inspect.isclass(arg_values['self'].__class__): + partial_types = (arg_values['self'].__class__,) + elif 'cls' in arg_values: + if tf_inspect.isclass(arg_values['cls']): + partial_types = (arg_values['cls'],) + + converted_f = to_graph( + target_entity, + recursive=recursive, + verbose=verbose, + arg_values=arg_values, + arg_types=arg_types, + partial_types=partial_types) + return converted_f(*effective_args, **kwargs) + + def to_graph(e, recursive=True, verbose=False, @@ -189,7 +242,7 @@ def to_graph(e, # The compiled code should see everything the entry function saw. # TODO(mdan): This might not work well if the call tree spans modules? if tf_inspect.isfunction(e): - compiled_node.__dict__.update(six.get_function_globals(e)) + compiled_node.__dict__.update(inspect_utils.getnamespace(e)) compiled_fn = getattr(compiled_node, name) if verbose: -- GitLab From 7f53659bc67bba5567ea3f0b69710329843e0228 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 27 Feb 2018 10:19:08 -0800 Subject: [PATCH 126/311] Bump the version of CUB in cmake build. --- tensorflow/contrib/cmake/external/cub.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake index 8368898955..98a8c7e736 100644 --- a/tensorflow/contrib/cmake/external/cub.cmake +++ b/tensorflow/contrib/cmake/external/cub.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip) -set(cub_HASH SHA256=20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31) +set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip) +set(cub_HASH SHA256=6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3) set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive) -- GitLab From 246cad289498357523517b67a3f214960dfa0f92 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 27 Feb 2018 14:32:57 -0800 Subject: [PATCH 127/311] "soft placement" for eager PiperOrigin-RevId: 187233434 --- tensorflow/c/eager/c_api.cc | 69 ++++++++++++++++++++++++++--- tensorflow/c/eager/c_api.h | 6 ++- tensorflow/c/eager/c_api_internal.h | 8 +++- tensorflow/c/eager/runtime.h | 2 + tensorflow/python/eager/ops_test.py | 20 +++++++++ 5 files changed, 96 insertions(+), 9 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index bebb63c746..29c709b06d 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/copy_tensor.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_set.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/node_def_util.h" @@ -68,6 +69,18 @@ std::atomic_int_fast64_t func_id_generator(0); #endif // TENSORFLOW_EAGER_USE_XLA } // namespace +TFE_ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy) { + if (!soft_placement) { + return original_policy; + } + if (original_policy == TFE_DEVICE_PLACEMENT_EXPLICIT || + original_policy == TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32) { + return TFE_DEVICE_PLACEMENT_SILENT; + } + return original_policy; +} + extern "C" { TFE_ContextOptions* TFE_NewContextOptions() { return new TFE_ContextOptions; } @@ -777,15 +790,38 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { return launch_op; } #endif // TENSORFLOW_EAGER_USE_XLA + +tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, + TFE_Context* ctx, TF_Status* status) { + tensorflow::DeviceSet ds; + for (tensorflow::Device* d : ctx->devices()) { + ds.AddDevice(d); + } + tensorflow::DeviceTypeVector final_devices; + status->status = tensorflow::SupportedDeviceTypesForNode( + ds.PrioritizedDeviceTypeList(), ndef, &final_devices); + if (!status->status.ok()) { + return nullptr; + } + if (final_devices.empty()) { + status->status = tensorflow::errors::Internal( + "Could not find valid device for node ", ndef.DebugString()); + return nullptr; + } + for (tensorflow::Device* d : ctx->devices()) { + if (d->device_type() == final_devices[0].type_string()) { + return d; + } + } + status->status = tensorflow::errors::Unknown( + "Could not find a device for node ", ndef.DebugString()); + return nullptr; +} + } // namespace void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { - TFE_Context* ctx = op->ctx; - // TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU - tensorflow::Device* device = - (op->device == nullptr) ? ctx->devices()[0] : op->device; - #ifdef TENSORFLOW_EAGER_USE_XLA std::unique_ptr xla_launch_op; if (op->use_xla && op->name != "_XlaLaunch") { @@ -797,9 +833,17 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } #endif // TENSORFLOW_EAGER_USE_XLA + TFE_Context* ctx = op->ctx; + tensorflow::Device* device = op->device; + if (!ctx->soft_placement && device == nullptr) { + // TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU + device = ctx->devices()[0]; + } + std::vector outputs(1); const tensorflow::MemoryTypeVector* output_memory_types = nullptr; - tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device->name()); + tensorflow::Fprint128 cache_key = + op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); tensorflow::KernelAndDevice* kernel; { tensorflow::tf_shared_lock l(ctx->cache_mu); @@ -807,6 +851,13 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } if (kernel == nullptr) { const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); + if (ctx->soft_placement && device == nullptr) { + device = SelectDevice(ndef, ctx, status); + if (!status->status.ok()) { + return; + } + } + CHECK(device != nullptr); if (ctx->log_device_placement) { LOG(INFO) << "Executing op " << ndef.op() << " in device " << device->name(); @@ -846,6 +897,12 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } + if (device == nullptr) { + // TODO(apassos) debug how the assignment below might return a different + // device from the one requested above. + device = kernel->device(); + } + std::vector copied_tensors; status->status = ValidateInputTypeAndPlacement( ctx, ctx->devices()[0], device, op, kernel->kernel(), &copied_tensors); diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 90cfb7500e..9610ca1b3b 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -61,7 +61,8 @@ TF_CAPI_EXPORT extern void TFE_ContextOptionsSetConfig( // Controls how to act when we try to run an operation on a given device but // some input tensors are not on that device. typedef enum TFE_ContextDevicePlacementPolicy { - // Running operations with input tensors on the wrong device will fail. + // Running operations with input tensors on the wrong device will fail. When + // soft placement is enabled acts like TFE_DEVICE_PLACEMENT_SILENT. TFE_DEVICE_PLACEMENT_EXPLICIT = 0, // Copy the tensor to the right device but log a warning. TFE_DEVICE_PLACEMENT_WARN = 1, @@ -69,7 +70,8 @@ typedef enum TFE_ContextDevicePlacementPolicy { // operation will be blocked till the copy completes. TFE_DEVICE_PLACEMENT_SILENT = 2, // Default placement policy which silently copies int32 tensors but not other - // dtypes. + // dtypes. When soft placement is enabled acts like + // TFE_DEVICE_PLACEMENT_SILENT. TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, } TFE_ContextDevicePlacementPolicy; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 3356054cd0..53c21b64cb 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -43,9 +43,14 @@ struct TFE_ContextOptions { TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32}; }; +TFE_ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy); + struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : policy(opts.policy), + : soft_placement( + opts.session_options.options.config.allow_soft_placement()), + policy(PlacementPolicy(soft_placement, opts.policy)), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -54,6 +59,7 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} + const bool soft_placement; const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 7fede4dae9..985ed96735 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -183,6 +183,8 @@ class KernelAndDevice { const OpKernel* kernel() const { return kernel_.get(); } + Device* device() const { return device_; } + DataTypeVector* output_dtypes() { return &output_dtypes_; } private: diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index f2e70341d9..553571d267 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import test @@ -277,6 +278,25 @@ class OpsTest(test_util.TensorFlowTestCase): context._context = context.Context() # pylint: enable=protected-access + def testSoftPlacement(self): + if not context.context().num_gpus(): + self.skipTest('No GPUs found') + # Temporarily replace the context + # pylint: disable=protected-access + del context._context + try: + context._context = context.Context( + device_policy=context.DEVICE_PLACEMENT_SILENT, + config=config_pb2.ConfigProto(allow_soft_placement=True)) + cpu_tensor = constant_op.constant(1.0) + result = cpu_tensor + cpu_tensor + self.assertEqual(result.device, + '/job:localhost/replica:0/task:0/device:GPU:0') + finally: + del context._context + context._context = context.Context() + # pylint: enable=protected-access + def testRandomUniform(self): scalar_shape = constant_op.constant([], dtype=dtypes.int32) -- GitLab From 80b6956b7cf4a092ff0780d133cd2faad4cda704 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 14:37:14 -0800 Subject: [PATCH 128/311] Added a TFLite Java API to get last inference latency in nanoseconds. PiperOrigin-RevId: 187234119 --- .../lite/NativeInterpreterWrapper.java | 16 +++++++- .../src/main/native/duration_utils_jni.cc | 38 +++++++++++++++++ .../native/nativeinterpreterwrapper_jni.cc | 12 +++++- .../native/nativeinterpreterwrapper_jni.h | 9 +++- .../lite/NativeInterpreterWrapperTest.java | 41 +++++++++++++++++++ .../java/org/tensorflow/lite/TestHelper.java | 15 +++++++ 6 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index 5ee594dec4..7612be0ddd 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -91,8 +91,9 @@ final class NativeInterpreterWrapper implements AutoCloseable { i, inputs.length)); } } + inferenceDurationNanoseconds = -1; long[] outputsHandles = - run(interpreterHandle, errorHandle, sizes, dataTypes, numsOfBytes, inputs); + run(interpreterHandle, errorHandle, sizes, dataTypes, numsOfBytes, inputs, this); if (outputsHandles == null || outputsHandles.length == 0) { throw new IllegalStateException("Interpreter has no outputs."); } @@ -109,7 +110,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { Object[] sizes, int[] dtypes, int[] numsOfBytes, - Object[] values); + Object[] values, + NativeInterpreterWrapper wrapper); /** Resizes dimensions of a specific input. */ void resizeInput(int idx, int[] dims) { @@ -236,6 +238,14 @@ final class NativeInterpreterWrapper implements AutoCloseable { } } + /** + * Gets the last inference duration in nanoseconds. It returns null if there is no previous + * inference run or the last inference run failed. + */ + Long getLastNativeInferenceDurationNanoseconds() { + return (inferenceDurationNanoseconds < 0) ? null : inferenceDurationNanoseconds; + } + private static final int ERROR_BUFFER_SIZE = 512; private long errorHandle; @@ -246,6 +256,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { private int inputSize; + private long inferenceDurationNanoseconds = -1; + private MappedByteBuffer modelByteBuffer; private Map inputsIndexes; diff --git a/tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc b/tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc new file mode 100644 index 0000000000..0e08a04370 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +namespace tflite { + +// Gets the elapsed wall-clock timespec. +timespec getCurrentTime() { + timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + return time; +} + +// Computes the time diff from two timespecs. Returns '-1' if 'stop' is earlier +// than 'start'. +jlong timespec_diff_nanoseconds(struct timespec* start, struct timespec* stop) { + jlong result = stop->tv_sec - start->tv_sec; + if (result < 0) return -1; + result = 1000000000 * result + (stop->tv_nsec - start->tv_nsec); + if (result < 0) return -1; + return result; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index c346f9f92e..e405df0745 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -353,7 +353,7 @@ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values) { + jobjectArray values, jobject wrapper) { tflite::Interpreter* interpreter = convertLongToInterpreter(env, interpreter_handle); if (interpreter == nullptr) return nullptr; @@ -384,6 +384,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( status = setInputs(env, interpreter, input_size, data_types, nums_of_bytes, values); if (status != kTfLiteOk) return nullptr; + timespec beforeInference = ::tflite::getCurrentTime(); // runs inference if (interpreter->Invoke() != kTfLiteOk) { throwException(env, kIllegalArgumentException, @@ -391,6 +392,15 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( error_reporter->CachedErrorMessage()); return nullptr; } + timespec afterInference = ::tflite::getCurrentTime(); + jclass wrapper_clazz = env->GetObjectClass(wrapper); + jfieldID fid = + env->GetFieldID(wrapper_clazz, "inferenceDurationNanoseconds", "J"); + if (fid != 0) { + env->SetLongField( + wrapper, fid, + ::tflite::timespec_diff_nanoseconds(&beforeInference, &afterInference)); + } // returns outputs const std::vector& results = interpreter->outputs(); if (results.empty()) { diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index c52a7e4e43..31c8f1bc88 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" @@ -28,6 +29,9 @@ limitations under the License. namespace tflite { // This is to be provided at link-time by a library. extern std::unique_ptr CreateOpResolver(); +extern timespec getCurrentTime(); +extern jlong timespec_diff_nanoseconds(struct timespec* start, + struct timespec* stop); } // namespace tflite #ifdef __cplusplus @@ -104,13 +108,14 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: - * Signature: (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;)[J + * Signature: + * (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;Lorg/tensorflow/lite/NativeInterpreterWrapper;)[J */ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values); + jobjectArray values, jobject wrapper); /* * Class: org_tensorflow_lite_NativeInterpreterWrapper diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index 90323555d8..8c1f2406f7 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -417,4 +417,45 @@ public final class NativeInterpreterWrapperTest { assertThat(shape[1]).isEqualTo(3); assertThat(shape[2]).isEqualTo(1); } + + @Test + public void testGetInferenceLatency() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + assertThat(wrapper.getLastNativeInferenceDurationNanoseconds()).isGreaterThan(0L); + wrapper.close(); + } + + @Test + public void testGetInferenceLatencyWithNewWrapper() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + assertThat(wrapper.getLastNativeInferenceDurationNanoseconds()).isNull(); + wrapper.close(); + } + + @Test + public void testGetLatencyAfterFailedInference() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains("0-th input dimension should be [?,8,8,3], but found [?,8,7,3]"); + } + assertThat(wrapper.getLastNativeInferenceDurationNanoseconds()).isNull(); + wrapper.close(); + } } diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java index 8660cabf70..a5c13053d7 100644 --- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java @@ -32,4 +32,19 @@ public class TestHelper { throw new IllegalArgumentException("Interpreter has not initialized; Failed to setUseNNAPI."); } } + + /** + * Gets the last inference duration in nanoseconds. It returns null if there is no previous + * inference run or the last inference run failed. + * + * @param interpreter an instance of {@code Interpreter}. If it is not initialized, an {@code + * IllegalArgumentException} will be thrown. + */ + public static Long getLastNativeInferenceDurationNanoseconds(Interpreter interpreter) { + if (interpreter != null && interpreter.wrapper != null) { + return interpreter.wrapper.getLastNativeInferenceDurationNanoseconds(); + } else { + throw new IllegalArgumentException("Interpreter has not initialized; Failed to get latency."); + } + } } -- GitLab From e101ce9c1c8399fecd6679293d8cb2065ce8d47f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Feb 2018 14:55:13 -0800 Subject: [PATCH 129/311] Properly handle inlining failures PiperOrigin-RevId: 187237044 --- .../core/grappler/optimizers/function_optimizer.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 3c96ff869b..ba8a76ad5f 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -27,12 +27,15 @@ namespace tensorflow { namespace grappler { Status InlineFunction(const NodeDef& node, const FunctionDef& func, - GraphDef* graph) { + const FunctionDefLibrary& library, GraphDef* graph) { const std::unordered_map attr(node.attr().begin(), node.attr().end()); - FunctionDefLibrary library; std::unique_ptr item = GrapplerItemFromFunctionDef(func, attr, library); + if (!item) { + return errors::InvalidArgument("Failed to inline function ", node.op(), + " instantiated by ", node.name()); + } std::unordered_map input_nodes; for (int i = 0; i < func.signature().input_arg_size(); ++i) { @@ -129,7 +132,8 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, if (it == functions.end()) { *optimized_graph->add_node() = node; } else { - TF_RETURN_IF_ERROR(InlineFunction(node, *it->second, optimized_graph)); + TF_RETURN_IF_ERROR(InlineFunction(node, *it->second, item.graph.library(), + optimized_graph)); } } -- GitLab From 2c25f08b6f97155bd5ce95aada5a3cc9b916176f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 15:19:47 -0800 Subject: [PATCH 130/311] Implement support for unpartitioning tf.nn.embedding_lookup into a single gather. PiperOrigin-RevId: 187241089 --- tensorflow/contrib/lite/toco/BUILD | 1 + .../graph_transformations.h | 1 + .../propagate_fixed_sizes.cc | 6 + .../remove_trivial_passthrough.cc | 4 +- .../unpartition_embedding_lookup.cc | 237 ++++++++++++++++++ .../contrib/lite/toco/import_tensorflow.cc | 41 +++ tensorflow/contrib/lite/toco/model.h | 26 ++ tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + tensorflow/contrib/lite/toco/tooling_util.cc | 11 + tensorflow/contrib/lite/toco/tooling_util.h | 6 +- 10 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 17407f3db2..845bc0460f 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -240,6 +240,7 @@ cc_library( "graph_transformations/resolve_tensorflow_tile.cc", "graph_transformations/resolve_transpose_attributes.cc", "graph_transformations/unfuse_activation_functions.cc", + "graph_transformations/unpartition_embedding_lookup.cc", "graph_transformations/unroll_batch_matmul.cc", ], hdrs = [ diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index f2c81ebc81..f0739990ad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -177,6 +177,7 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStridedSlice) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFill) DECLARE_GRAPH_TRANSFORMATION(ResolveMultiplyByZero) DECLARE_GRAPH_TRANSFORMATION(Dequantize) +DECLARE_GRAPH_TRANSFORMATION(UnpartitionEmbeddingLookup) class ResolveReshapeAttributes : public GraphTransformation { public: diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 0e2e5ecf30..fc26f997a6 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1542,6 +1542,12 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kTranspose: ProcessTransposeOperator(model, static_cast(op)); break; + case OperatorType::kDynamicPartition: + case OperatorType::kDynamicStitch: + // DynamicPartition/DynamicStitch are currently only supported for + // transforms that remove them, so we avoid propagating shapes through + // them and let things settle once they've been removed. + break; default: // Unimplemented, another graph transformation should drop it. LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(op->type); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc index 587f171bbf..aa93ace03a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc @@ -60,7 +60,9 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, for (int i = 0; i < passthru_op->inputs.size(); i++) { if (!model->GetArray(passthru_op->inputs[i]).buffer) { count_nonconstant_input_arrays++; - main_input_array_index = i; + if (count_nonconstant_input_arrays == 1) { + main_input_array_index = i; + } } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc new file mode 100644 index 0000000000..419fb9a799 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc @@ -0,0 +1,237 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { + // Collapses a partitioned tf.nn.embedding_lookup back into a single Gather. + // https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup + // This transform attempts to identify the len(params) > 1 case and collapse + // it to the len(params) = 1 case by concatenating the original params and + // reversing the partitioning. + // + // If len(params) to the tf.nn.embedding_lookup == 1, the whole op becomes + // simply a gather: + // https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/python/ops/embedding_ops.py#L150 + // + // Notes on this implementation: + // - only supports partition_strategy='mod' + // + // A rough graph of a partitioned embedding_lookup looks like: + // (ids)--+-->FloorDiv--+-->DynamicPartition-->[[Gather]]--\ + // \-->FloorMod--/ | + // V | + // Range-->DynamicPartition-------->DynamicStitch<---------/ + // (const) V + // (embeddings) + + // First look for the final DynamicStitch. + auto op_it = model->operators.begin() + op_index; + if (op_it->get()->type != OperatorType::kDynamicStitch) { + return false; + } + auto* stitch_op = static_cast(op_it->get()); + + // Split up the DynamicStitch inputs into the indices and data. + std::vector stitch_indices_inputs; + std::vector stitch_data_inputs; + for (size_t i = 0; i < stitch_op->num_partitions; ++i) { + stitch_indices_inputs.push_back(stitch_op->inputs[i]); + } + for (size_t i = stitch_op->num_partitions; i < stitch_op->num_partitions * 2; + ++i) { + stitch_data_inputs.push_back(stitch_op->inputs[i]); + } + + // Validate all indices come from the same DynamicPartition. + DynamicPartitionOperator* indices_partition_op = nullptr; + for (const string& indices_partition_output_name : stitch_indices_inputs) { + auto* op = GetOpWithOutput(*model, indices_partition_output_name); + CHECK(op) << "Source of " << indices_partition_output_name << " not found"; + if (op->type != OperatorType::kDynamicPartition) { + AddMessageF( + "Skipping because indices input %s into " + "%s is unexpected", + LogName(*op), LogName(*stitch_op)); + return false; + } + if (!indices_partition_op) { + indices_partition_op = static_cast(op); + } else { + // Ensure this is the same op as previous ones. + if (op != indices_partition_op) { + AddMessageF( + "Skipping because indices input %s into " + "%s is from a different source op than others", + LogName(*op), LogName(*stitch_op)); + return false; + } + } + } + CHECK(indices_partition_op) << "No indices inputs"; + + // The data for the indices must be a constant range of the array shape. + if (!IsConstantParameterArray(*model, indices_partition_op->inputs[0])) { + AddMessageF("Skipping because indices partition data is non-constant"); + return false; + } + auto& indices_data_array = model->GetArray(indices_partition_op->inputs[0]); + if (indices_data_array.data_type == ArrayDataType::kNone) { + // Yield until data types are propagated. + return false; + } + CHECK(indices_data_array.data_type == ArrayDataType::kInt32) + << "Indices partition inputs must be int32"; + const auto& indices_data_buffer = + indices_data_array.GetBuffer().data; + for (size_t i = 0; i < indices_data_buffer.size(); ++i) { + CHECK_EQ(indices_data_buffer[i], i) << "Indices range must be identity"; + } + + // Find all of the gathers used for the data inputs. + std::vector gather_ops; + for (const string& gather_output_name : stitch_data_inputs) { + auto* op = GetOpWithOutput(*model, gather_output_name); + CHECK(op) << "Source of " << gather_output_name << " not found"; + if (op->type != OperatorType::kGather) { + AddMessageF( + "Skipping because data input %s into %s " + "is unexpected", + LogName(*op), LogName(*stitch_op)); + return false; + } + gather_ops.push_back(static_cast(op)); + } + + // Validate all gathers come from the same DynamicPartition. + DynamicPartitionOperator* data_partition_op = nullptr; + for (auto* gather_op : gather_ops) { + auto* op = GetOpWithOutput(*model, gather_op->inputs[1]); + CHECK(op) << "Source of " << gather_op->inputs[1] << " not found"; + if (op->type != OperatorType::kDynamicPartition) { + AddMessageF( + "Skipping because data input %s into " + "%s is unexpected", + LogName(*op), LogName(*gather_op)); + return false; + } + if (!data_partition_op) { + data_partition_op = static_cast(op); + } else { + // Ensure this is the same op as previous ones. + if (op != data_partition_op) { + AddMessageF( + "Skipping because data input %s into " + "%s is from a different source op than others", + LogName(*op), LogName(*gather_op)); + return false; + } + } + } + CHECK(data_partition_op) << "No data inputs"; + + // Validate the partition ops have the same sizes. + CHECK_EQ(indices_partition_op->num_partitions, + data_partition_op->num_partitions) + << "Indices and data partition ops have differing dimensions"; + int num_partitions = indices_partition_op->num_partitions; + + // Partition strategy of 'mod' gives us a FloorMod and FloorDiv. + // The gather partition uses the FloorDiv as the data and FloorMod as the + // partitions and the indices use the FloorMod as their partitions. + Operator* div_op = GetOpWithOutput(*model, data_partition_op->inputs[0]); + Operator* mod_op = GetOpWithOutput(*model, data_partition_op->inputs[1]); + CHECK(div_op && div_op->type == OperatorType::kFloorDiv) + << "Unsupported partition strategy"; + CHECK(mod_op && mod_op->type == OperatorType::kFloorMod) + << "Unsupported partition strategy"; + CHECK_EQ(mod_op, GetOpWithOutput(*model, indices_partition_op->inputs[1])) + << "Indices and data parition ops require the same partition strategy " + "and inputs"; + + // Glob together all of the gather data. This is not yet in the correct order. + auto* gather_params_concat_op = new ConcatenationOperator; + for (const auto& gather_op : gather_ops) { + gather_params_concat_op->inputs.push_back(gather_op->inputs[0]); + } + gather_params_concat_op->outputs.push_back( + AvailableArrayName(*model, gather_ops[0]->inputs[0] + "_unpartitioned")); + op_it = model->operators.emplace(op_it, gather_params_concat_op) + 1; + model->GetOrCreateArray(gather_params_concat_op->outputs[0]); + + // Permute the gather params to undo the partitioning that was originally + // done. + auto* gather_params_permute_op = new GatherOperator; + gather_params_permute_op->inputs.push_back( + gather_params_concat_op->outputs[0]); + gather_params_permute_op->inputs.push_back( + AvailableArrayName(*model, gather_ops[0]->inputs[0] + "_permuted/perm")); + gather_params_permute_op->outputs.push_back( + AvailableArrayName(*model, gather_ops[0]->inputs[0] + "_permuted")); + op_it = model->operators.emplace(op_it, gather_params_permute_op) + 1; + model->GetOrCreateArray(gather_params_permute_op->outputs[0]); + const auto& partition_array = model->GetArray(gather_ops[0]->inputs[0]); + const auto& partition_array_dims = partition_array.shape().dims(); + auto& perm_array = + model->GetOrCreateArray(gather_params_permute_op->inputs[1]); + perm_array.data_type = ArrayDataType::kInt32; + perm_array.mutable_shape()->ReplaceDims( + {num_partitions * partition_array_dims[0]}); + auto& perm_data = perm_array.GetMutableBuffer().data; + perm_data.resize(RequiredBufferSizeForShape(perm_array.shape())); + // NOTE: this is what relies on the partition_strategy. + for (int i = 0; i < num_partitions * partition_array_dims[0]; ++i) { + int p = i % num_partitions; + perm_data[i] = p * partition_array_dims[0] + i / num_partitions; + } + + // Insert the new unpartitioned gather op. + auto* merged_gather_op = new GatherOperator; + merged_gather_op->inputs = {gather_params_permute_op->outputs[0], + mod_op->inputs[0]}; + merged_gather_op->outputs = {stitch_op->outputs[0]}; + model->operators.emplace(op_it, merged_gather_op); + + AddMessageF( + "Replacing suspected partitioned tf.nn.embedding_lookup (starting at %s " + "+ %s and ending at %s) with a single unpartitioned gather %s", + LogName(*div_op), LogName(*mod_op), LogName(*stitch_op), + LogName(*merged_gather_op)); + + // Ensure the stitch output array is dead, as we don't want whatever was in it + // previously now that we've redefined it. It'll be recreated when needed. + model->EraseArray(stitch_op->outputs[0]); + model->GetOrCreateArray(merged_gather_op->outputs[0]); + + // Erase all the original ops. + DeleteOpAndArraysIfUnused(model, div_op); + DeleteOpAndArraysIfUnused(model, mod_op); + for (auto* gather_op : gather_ops) { + DeleteOpAndArraysIfUnused(model, gather_op); + } + DeleteOpAndArraysIfUnused(model, indices_partition_op); + DeleteOpAndArraysIfUnused(model, data_partition_op); + DeleteOpAndArraysIfUnused(model, stitch_op); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 52a0512e23..41abca864d 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1896,6 +1896,42 @@ void ConvertTopKV2Operator(const NodeDef& node, op->outputs.push_back(node.name() + ":1"); model->operators.emplace_back(op.release()); } + +void ConvertDynamicPartitionOperator( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { + auto op = absl::make_unique(); + CHECK(HasAttr(node, "num_partitions")); + op->num_partitions = GetIntAttr(node, "num_partitions"); + CheckInputsCount(node, tf_import_flags, 2); + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + CHECK_GT(op->num_partitions, 1); + op->outputs.push_back(node.name()); // Implicit :0. + for (int i = 1; i < op->num_partitions; ++i) { + op->outputs.push_back(node.name() + ":" + std::to_string(i)); + } + model->operators.emplace_back(op.release()); +} + +void ConvertDynamicStitchOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { + // The parallel and non-parallel variants are the same besides whether they + // have a parallel loop; there are no behavioral differences. + CHECK(node.op() == "DynamicStitch" || node.op() == "ParallelDynamicStitch"); + auto op = absl::make_unique(); + CHECK(HasAttr(node, "N")); + op->num_partitions = GetIntAttr(node, "N"); + // Expect all ID partitions + all value partitions. + CheckInputsCount(node, tf_import_flags, op->num_partitions * 2); + for (int i = 0; i < op->num_partitions * 2; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op.release()); +} + } // namespace std::unique_ptr ImportTensorFlowGraphDef( @@ -2081,6 +2117,11 @@ std::unique_ptr ImportTensorFlowGraphDef( ConvertExpOperator(node, tf_import_flags, model); } else if (node.op() == "TopK" || node.op() == "TopKV2") { ConvertTopKV2Operator(node, tf_import_flags, model); + } else if (node.op() == "DynamicPartition") { + ConvertDynamicPartitionOperator(node, tf_import_flags, model); + } else if (node.op() == "DynamicStitch" || + node.op() == "ParallelDynamicStitch") { + ConvertDynamicStitchOperator(node, tf_import_flags, model); } else { ConvertUnsupportedOperator(node, tf_import_flags, model); } diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index d5df0fb951..ed0dedc003 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -115,6 +115,8 @@ enum class OperatorType { kTensorFlowTile, kTranspose, kTopK_V2, + kDynamicPartition, + kDynamicStitch, // An unsupported TF operation. It's only needed to be able to represent TF // graph internally and is expected to be dropped by graph transformations. kTensorFlowUnsupported, @@ -1414,6 +1416,30 @@ struct TopKV2Operator : Operator { TopKV2Operator() : Operator(OperatorType::kTopK_V2) {} }; +// DynamicPartition operator: +// +// Inputs: +// inputs[0]: required: data. +// inputs[1]: required: partitions. +// +// TensorFlow equivalent: DynamicPartition +struct DynamicPartitionOperator : Operator { + DynamicPartitionOperator() : Operator(OperatorType::kDynamicPartition) {} + int num_partitions; +}; + +// DynamicStitch operator: +// +// Inputs: +// inputs[0,N): required: indices. +// inputs[N,2N): required: data. +// +// TensorFlow equivalent: DynamicStitch/ParallelDynamicStitch +struct DynamicStitchOperator : Operator { + DynamicStitchOperator() : Operator(OperatorType::kDynamicStitch) {} + int num_partitions; +}; + // Alloc's are used for transient arrays only. An Alloc specifies which interval // of the "transient_data" workspace buffer passed to inference functions, is to // be used for the transient array at hand. The 'start' and 'end' values are diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index a09a3c4ef5..42e0a89017 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -102,6 +102,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveConstantShapeOrRank); transformations->Add(new MakeInitialDequantizeOperator); transformations->Add(new ResolveConstantFakeQuant); + transformations->Add(new UnpartitionEmbeddingLookup); } bool SupportsQuantization(FileFormat format) { diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index d23b3737fc..f92e10752d 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -159,6 +159,15 @@ bool DeleteArrayIfUsedOnce(const string& array_name, Model* model) { return false; } +void DeleteOpAndArraysIfUnused(Model* model, Operator* op) { + for (const string& array_name : op->inputs) { + DeleteArrayIfUsedOnce(array_name, model); + } + auto op_it = FindOp(*model, op); + CHECK(op_it != model->operators.end()); + model->operators.erase(op_it); +} + std::vector>::const_iterator FindOpWithOutput( const Model& model, const string& array_name) { for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { @@ -347,6 +356,8 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(TopK_V2) HANDLE_OPERATORTYPENAME_CASE(TensorFlowUnsupported) HANDLE_OPERATORTYPENAME_CASE(Exp) + HANDLE_OPERATORTYPENAME_CASE(DynamicPartition) + HANDLE_OPERATORTYPENAME_CASE(DynamicStitch) default: LOG(FATAL) << "Unhandled op type"; #undef HANDLE_OPERATORTYPENAME_CASE diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index 11208ed667..01917b29de 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -64,6 +64,10 @@ int CountOpsWithInput(const Model& model, const string& array_name); bool DeleteArrayIfUnused(const string& array_name, Model* model); bool DeleteArrayIfUsedOnce(const string& array_name, Model* model); +// Deletes the op and any of its input and output arrays if they are unused +// after the op has been deleted. +void DeleteOpAndArraysIfUnused(Model* model, Operator* op); + std::vector>::const_iterator FindOpWithOutput( const Model& model, const string& array_name); Operator* GetOpWithOutput(const Model& model, const string& array_name); @@ -71,8 +75,6 @@ Operator* GetOpWithOutput(const Model& model, const string& array_name); std::vector>::iterator FindOpWithOutput( Model& model, const string& array_name); -Operator* GetOpWithOutput(const Model& model, const string& array_name); - std::vector>::const_iterator FindOpWithInput( const Model& model, const string& array_name); -- GitLab From 53b2181ea5cff054d40c583f05da942a9a56a283 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Tue, 27 Feb 2018 15:32:16 -0800 Subject: [PATCH 131/311] Make RecentRequestIds more efficient. PiperOrigin-RevId: 187242940 --- tensorflow/core/distributed_runtime/BUILD | 1 + .../core/distributed_runtime/recent_request_ids.cc | 9 ++++++--- .../core/distributed_runtime/recent_request_ids.h | 6 ++++-- .../distributed_runtime/recent_request_ids_test.cc | 13 +++++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 9e152aa082..434626bd2d 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -595,6 +595,7 @@ tf_cc_test( srcs = ["recent_request_ids_test.cc"], deps = [ ":recent_request_ids", + ":request_id", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.cc b/tensorflow/core/distributed_runtime/recent_request_ids.cc index c30879406c..4f6866c5d1 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids.cc +++ b/tensorflow/core/distributed_runtime/recent_request_ids.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/recent_request_ids.h" +#include + #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" @@ -29,12 +31,14 @@ RecentRequestIds::RecentRequestIds(int num_tracked_request_ids) Status RecentRequestIds::TrackUnique(int64 request_id, const string& method_name, const protobuf::Message& request) { - mutex_lock l(mu_); if (request_id == 0) { // For backwards compatibility, allow all requests with request_id 0. return Status::OK(); } - if (set_.count(request_id) > 0) { + + mutex_lock l(mu_); + const bool inserted = set_.insert(request_id).second; + if (!inserted) { // Note: RecentRequestIds is not strict LRU because we don't update // request_id's age in the circular_buffer_ if it's tracked again. Strict // LRU is not useful here because returning this error will close the @@ -49,7 +53,6 @@ Status RecentRequestIds::TrackUnique(int64 request_id, // when the buffer is not yet full. set_.erase(circular_buffer_[next_index_]); circular_buffer_[next_index_] = request_id; - set_.insert(request_id); next_index_ = (next_index_ + 1) % circular_buffer_.size(); return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.h b/tensorflow/core/distributed_runtime/recent_request_ids.h index e8e45331dd..11cf937c94 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids.h +++ b/tensorflow/core/distributed_runtime/recent_request_ids.h @@ -16,11 +16,13 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RECENT_REQUEST_IDS_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RECENT_REQUEST_IDS_H_ +#include +#include #include #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/worker.pb.h" @@ -64,7 +66,7 @@ class RecentRequestIds { // request_id. int next_index_ GUARDED_BY(mu_) = 0; std::vector circular_buffer_ GUARDED_BY(mu_); - gtl::FlatSet set_ GUARDED_BY(mu_); + std::unordered_set set_ GUARDED_BY(mu_); }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/recent_request_ids_test.cc b/tensorflow/core/distributed_runtime/recent_request_ids_test.cc index 9a0facf540..8910a50e9c 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids_test.cc +++ b/tensorflow/core/distributed_runtime/recent_request_ids_test.cc @@ -17,8 +17,10 @@ limitations under the License. #include +#include "tensorflow/core/distributed_runtime/request_id.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/worker.pb.h" @@ -93,4 +95,15 @@ TEST(RecentRequestIds, Ordered3) { TestOrdered(3); } TEST(RecentRequestIds, Ordered4) { TestOrdered(4); } TEST(RecentRequestIds, Ordered5) { TestOrdered(5); } +void BM_TrackUnique(int iters) { + RecentRequestIds recent_request_ids(100000); + RecvTensorRequest request; + for (int i = 0; i < iters; ++i) { + TF_CHECK_OK(recent_request_ids.TrackUnique(GetUniqueRequestId(), + "BM_TrackUnique", request)); + } +} + +BENCHMARK(BM_TrackUnique); + } // namespace tensorflow -- GitLab From c54a6ce4b53172569caa19991ec36be04121a359 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 15:39:58 -0800 Subject: [PATCH 132/311] tf.contrib.data.bucket_by_sequence_length for variable length inputs PiperOrigin-RevId: 187244061 --- tensorflow/contrib/data/__init__.py | 2 + .../python/kernel_tests/bucketing_test.py | 90 ++++++++++++++ .../contrib/data/python/ops/grouping.py | 115 ++++++++++++++++++ 3 files changed, 207 insertions(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index fcdccdd26c..1777727de8 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -25,6 +25,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@Counter @@batch_and_drop_remainder +@@bucket_by_sequence_length @@dense_to_sparse_batch @@enumerate_dataset @@group_by_window @@ -58,6 +59,7 @@ from tensorflow.contrib.data.python.ops.counter import Counter from tensorflow.contrib.data.python.ops.enumerate_ops import enumerate_dataset from tensorflow.contrib.data.python.ops.error_ops import ignore_errors from tensorflow.contrib.data.python.ops.get_single_element import get_single_element +from tensorflow.contrib.data.python.ops.grouping import bucket_by_sequence_length from tensorflow.contrib.data.python.ops.grouping import group_by_window from tensorflow.contrib.data.python.ops.interleave_ops import parallel_interleave from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py index f1b494e1a6..94f800e8a5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py @@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import random + import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base @@ -379,5 +381,93 @@ class BucketTest(test.TestCase): self.assertEqual(batches, 15) +class BucketBySequenceLength(test.TestCase): + + def testBucket(self): + + boundaries = [10, 20, 30] + batch_sizes = [10, 8, 4, 2] + lengths = [8, 13, 25, 35] + + def element_gen(): + # Produce 1 batch for each bucket + elements = [] + for batch_size, length in zip(batch_sizes, lengths): + for _ in range(batch_size): + elements.append([1] * length) + random.shuffle(elements) + for el in elements: + yield (el,) + + element_len = lambda el: array_ops.shape(el)[0] + dataset = dataset_ops.Dataset.from_generator( + element_gen, (dtypes.int64,), ([None],)).apply( + grouping.bucket_by_sequence_length( + element_len, boundaries, batch_sizes)) + batch, = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + batches = [] + for _ in range(4): + batches.append(sess.run(batch)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(batch) + batch_sizes_val = [] + lengths_val = [] + for batch in batches: + batch_size = batch.shape[0] + length = batch.shape[1] + batch_sizes_val.append(batch_size) + lengths_val.append(length) + self.assertEqual(sum(batch_sizes_val), sum(batch_sizes)) + self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) + self.assertEqual(sorted(lengths), sorted(lengths_val)) + + def testPadToBoundary(self): + + boundaries = [10, 20, 30] + batch_sizes = [10, 8, 4, 2] + lengths = [8, 13, 25] + + def element_gen(): + # Produce 1 batch for each bucket + elements = [] + for batch_size, length in zip(batch_sizes[:-1], lengths): + for _ in range(batch_size): + elements.append([1] * length) + random.shuffle(elements) + for el in elements: + yield (el,) + for _ in range(batch_sizes[-1]): + el = [1] * (boundaries[-1] + 5) + yield (el,) + + element_len = lambda el: array_ops.shape(el)[0] + dataset = dataset_ops.Dataset.from_generator( + element_gen, (dtypes.int64,), ([None],)).apply( + grouping.bucket_by_sequence_length( + element_len, boundaries, batch_sizes, + pad_to_bucket_boundary=True)) + batch, = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + batches = [] + for _ in range(3): + batches.append(sess.run(batch)) + with self.assertRaisesOpError("bucket_boundaries"): + sess.run(batch) + batch_sizes_val = [] + lengths_val = [] + for batch in batches: + batch_size = batch.shape[0] + length = batch.shape[1] + batch_sizes_val.append(batch_size) + lengths_val.append(length) + batch_sizes = batch_sizes[:-1] + self.assertEqual(sum(batch_sizes_val), sum(batch_sizes)) + self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) + self.assertEqual(sorted(boundaries), sorted(lengths_val)) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 67b085002a..a19be22254 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -17,13 +17,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import math_ops def group_by_window(key_func, @@ -85,6 +92,114 @@ def group_by_window(key_func, return _apply_fn +def bucket_by_sequence_length(element_length_func, + bucket_boundaries, + bucket_batch_sizes, + padded_shapes=None, + padding_values=None, + pad_to_bucket_boundary=False): + """A transformation that buckets elements in a `Dataset` by length. + + Elements of the `Dataset` are grouped together by length and then are padded + and batched. + + This is useful for sequence tasks in which the elements have variable length. + Grouping together elements that have similar lengths reduces the total + fraction of padding in a batch which increases training step efficiency. + + Args: + element_length_func: function from element in `Dataset` to `tf.int64`, + determines the length of the element, which will determine the bucket it + goes into. + bucket_boundaries: `list`, upper length boundaries of the buckets. + bucket_batch_sizes: `list`, batch size per bucket. Length should be + `len(bucket_boundaries) + 1`. + padded_shapes: Nested structure of `tf.TensorShape` to pass to + @{tf.data.Dataset.padded_batch}. If not provided, will use + `dataset.output_shapes`, which will result in variable length dimensions + being padded out to the maximum length in each batch. + padding_values: Values to pad with, passed to + @{tf.data.Dataset.padded_batch}. Defaults to padding with 0. + pad_to_bucket_boundary: bool, if `False`, will pad dimensions with unknown + size to maximum length in batch. If `True`, will pad dimensions with + unknown size to bucket boundary, and caller must ensure that the source + `Dataset` does not contain any elements with length longer than + `max(bucket_boundaries)`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + + Raises: + ValueError: if `len(bucket_batch_sizes) != len(bucket_boundaries) + 1`. + """ + with ops.name_scope("bucket_by_seq_length"): + if len(bucket_batch_sizes) != (len(bucket_boundaries) + 1): + raise ValueError( + "len(bucket_batch_sizes) must equal len(bucket_boundaries) + 1") + + batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64) + + def element_to_bucket_id(element): + """Return int64 id of the length bucket for this element.""" + seq_length = element_length_func(element) + + boundaries = list(bucket_boundaries) + buckets_min = [np.iinfo(np.int32).min] + boundaries + buckets_max = boundaries + [np.iinfo(np.int32).max] + conditions_c = math_ops.logical_and( + math_ops.less_equal(buckets_min, seq_length), + math_ops.less(seq_length, buckets_max)) + bucket_id = math_ops.reduce_min(array_ops.where(conditions_c)) + + return bucket_id + + def window_size_fn(bucket_id): + # The window size is set to the batch size for this bucket + window_size = batch_sizes[bucket_id] + return window_size + + def make_padded_shapes(shapes, none_filler=None): + padded = [] + for shape in nest.flatten(shapes): + shape = tensor_shape.TensorShape(shape) + shape = [ + none_filler if d.value is None else d + for d in shape + ] + padded.append(shape) + return nest.pack_sequence_as(shapes, padded) + + def batching_fn(bucket_id, grouped_dataset): + """Batch elements in dataset.""" + batch_size = batch_sizes[bucket_id] + none_filler = None + if pad_to_bucket_boundary: + err_msg = ("When pad_to_bucket_boundary=True, elements must have " + "length <= max(bucket_boundaries).") + check = check_ops.assert_less( + bucket_id, + constant_op.constant(len(bucket_batch_sizes) - 1, + dtype=dtypes.int64), + message=err_msg) + with ops.control_dependencies([check]): + boundaries = constant_op.constant(bucket_boundaries, + dtype=dtypes.int64) + bucket_boundary = boundaries[bucket_id] + none_filler = bucket_boundary + shapes = make_padded_shapes( + padded_shapes or grouped_dataset.output_shapes, + none_filler=none_filler) + return grouped_dataset.padded_batch(batch_size, shapes, padding_values) + + def _apply_fn(dataset): + return dataset.apply( + group_by_window(element_to_bucket_id, batching_fn, + window_size_func=window_size_fn)) + + return _apply_fn + + class _VariantDataset(dataset_ops.Dataset): """A Dataset wrapper for a tf.variant-typed function argument.""" -- GitLab From 64d98b3803e3d53e53f14fadd70fa0332de987a0 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 27 Feb 2018 15:41:18 -0800 Subject: [PATCH 133/311] Bump the version of CUB in cmake build. PiperOrigin-RevId: 187244251 --- tensorflow/contrib/cmake/external/cub.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake index 8368898955..98a8c7e736 100644 --- a/tensorflow/contrib/cmake/external/cub.cmake +++ b/tensorflow/contrib/cmake/external/cub.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip) -set(cub_HASH SHA256=20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31) +set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip) +set(cub_HASH SHA256=6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3) set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive) -- GitLab From 3ba1f72f8829c566372208062fcea04ab5695dc6 Mon Sep 17 00:00:00 2001 From: vihanjain Date: Tue, 27 Feb 2018 16:05:26 -0800 Subject: [PATCH 134/311] Pull request for fixing warm-starting device placement (#17312) * Update checkpoint_utils.py Fix device allocation bug for warm-starting op * Update checkpoint_utils_test.py Fix test --- tensorflow/python/training/checkpoint_utils.py | 6 +++++- tensorflow/python/training/checkpoint_utils_test.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py index 0af1cdecfa..8384d0ae94 100644 --- a/tensorflow/python/training/checkpoint_utils.py +++ b/tensorflow/python/training/checkpoint_utils.py @@ -289,7 +289,11 @@ def _set_checkpoint_initializer(variable, name: Name of the operation. """ base_type = variable.dtype.base_dtype - with ops.colocate_with(variable): + # Do not colocate with variable since RestoreV2 op only runs on CPU and + # colocation will force variable (and other ops that colocate with variable) + # to be on CPU as well. It is okay to place the variable's initializer op on + # CPU since it will only be run once at the start. + with ops.device(variable.device), ops.device("/cpu:0"): restore_op = io_ops.restore_v2( ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0] variable._initializer_op = state_ops.assign(variable, restore_op) # pylint:disable=protected-access diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py index a461b24cbb..f564871315 100644 --- a/tensorflow/python/training/checkpoint_utils_test.py +++ b/tensorflow/python/training/checkpoint_utils_test.py @@ -206,7 +206,9 @@ class CheckpointsTest(test.TestCase): checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"useful_scope/": "useful_scope/"}) - self.assertEqual(my4._initializer_op.op.inputs[1].device, "/job:ps") + # initializer runs on the same task but always on CPU. + self.assertEqual(my4._initializer_op.op.inputs[1].device, + "/job:ps/device:CPU:0") def testInitFromRootCheckpoint(self): checkpoint_dir = self.get_temp_dir() -- GitLab From e7e63d8b2386f2b3ddd234da77c15125516c65b6 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Tue, 27 Feb 2018 16:41:38 -0800 Subject: [PATCH 135/311] [XLA] Remove an unused function with a typo in its name. PiperOrigin-RevId: 187252967 --- tensorflow/compiler/xla/service/hlo_module.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 06d92f94fd..ca94118763 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -187,11 +187,6 @@ class HloModule { // Returns a randomly generated uint64. uint64 RandomNew64() const; - // Returns the unique name for a computation in this module. - string GetUniqueCompuationName(const string& prefix) { - return computation_name_uniquer_.GetUniqueName(prefix); - } - // Returns the NameUniquer for uniquing instruction names in this module. NameUniquer& instruction_name_uniquer() { return instruction_name_uniquer_; } -- GitLab From 944423c12057e4a5215fade57c286237dca2b48c Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Tue, 27 Feb 2018 17:02:47 -0800 Subject: [PATCH 136/311] Move security.md into the right place. PiperOrigin-RevId: 187255784 --- tensorflow/SECURITY.md => SECURITY.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tensorflow/SECURITY.md => SECURITY.md (100%) diff --git a/tensorflow/SECURITY.md b/SECURITY.md similarity index 100% rename from tensorflow/SECURITY.md rename to SECURITY.md -- GitLab From 681327cd00822f9e7620cf8d95141a75447132f1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 17:13:19 -0800 Subject: [PATCH 137/311] Changed back to Shard for SplitV to get better performance. PiperOrigin-RevId: 187257148 --- tensorflow/core/kernels/split_v_op.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 16fa890780..51d96a17b3 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -236,8 +236,9 @@ class SplitVOpCPUImpl { }; if (use_parallelism_between_outputs) { // Run in parallel, disabling parallelism in functor. - context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor( - num_split, input_element_count / num_split, range_output_func); + Shard(num_split, + context->device()->tensorflow_cpu_worker_threads()->workers, + num_split, input_element_count / num_split, range_output_func); } else { // Run sequentially, but allow internal parallelism in functor. range_output_func(0, num_split); -- GitLab From 6585008f3dc3ca0f9163a0588b09379eab46c78a Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Tue, 27 Feb 2018 17:32:27 -0800 Subject: [PATCH 138/311] Add unit tests for context propagation in ThreadPool and a benchmark for ParallelFor. PiperOrigin-RevId: 187259233 --- tensorflow/core/BUILD | 1 + tensorflow/core/lib/core/threadpool_test.cc | 57 ++++++++++++++++++--- tensorflow/core/platform/default/context.h | 2 + 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 1893967cdd..08832b58da 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -339,6 +339,7 @@ cc_library( "lib/strings/strcat.h", "lib/strings/stringprintf.h", "platform/abi.h", + "platform/context.h", "platform/cpu_feature_guard.h", "platform/cpu_info.h", "platform/dynamic_annotations.h", diff --git a/tensorflow/core/lib/core/threadpool_test.cc b/tensorflow/core/lib/core/threadpool_test.cc index 627ef5a892..320f3ebb83 100644 --- a/tensorflow/core/lib/core/threadpool_test.cc +++ b/tensorflow/core/lib/core/threadpool_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/core/platform/context.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/test.h" @@ -35,6 +36,7 @@ TEST(ThreadPool, Empty) { } TEST(ThreadPool, DoWork) { + Context outer_context(ContextKind::kThread); for (int num_threads = 1; num_threads < kNumThreads; num_threads++) { fprintf(stderr, "Testing with %d threads\n", num_threads); const int kWorkItems = 15; @@ -45,7 +47,9 @@ TEST(ThreadPool, DoWork) { { ThreadPool pool(Env::Default(), "test", num_threads); for (int i = 0; i < kWorkItems; i++) { - pool.Schedule([&work, i]() { + pool.Schedule([&outer_context, &work, i]() { + Context inner_context(ContextKind::kThread); + ASSERT_EQ(outer_context, inner_context); ASSERT_FALSE(work[i]); work[i] = true; }); @@ -58,6 +62,7 @@ TEST(ThreadPool, DoWork) { } TEST(ThreadPool, ParallelFor) { + Context outer_context(ContextKind::kThread); // Make ParallelFor use as many threads as possible. int64 kHugeCost = 1 << 30; for (int num_threads = 1; num_threads < kNumThreads; num_threads++) { @@ -68,12 +73,15 @@ TEST(ThreadPool, ParallelFor) { for (int i = 0; i < kWorkItems; i++) { work[i] = false; } - pool.ParallelFor(kWorkItems, kHugeCost, [&work](int64 begin, int64 end) { - for (int64 i = begin; i < end; ++i) { - ASSERT_FALSE(work[i]); - work[i] = true; - } - }); + pool.ParallelFor(kWorkItems, kHugeCost, + [&outer_context, &work](int64 begin, int64 end) { + Context inner_context(ContextKind::kThread); + ASSERT_EQ(outer_context, inner_context); + for (int64 i = begin; i < end; ++i) { + ASSERT_FALSE(work[i]); + work[i] = true; + } + }); for (int i = 0; i < kWorkItems; i++) { ASSERT_TRUE(work[i]); } @@ -167,5 +175,40 @@ static void BM_Parallel(int iters) { } BENCHMARK(BM_Parallel); +static void BM_ParallelFor(int iters, int total, int cost_per_unit) { + ThreadPool pool(Env::Default(), "test", kNumThreads); + // Decrement count concurrently until 0. + std::atomic_int_fast32_t count(iters); + mutex done_lock; + condition_variable done; + bool done_flag = false; + for (int i = 0; i < iters; ++i) { + pool.ParallelFor( + total, cost_per_unit, + [&count, &done_lock, &done, &done_flag](int64 begin, int64 end) { + for (int64 i = begin; i < end; ++i) { + if (count.fetch_sub(1) == 1) { + mutex_lock l(done_lock); + done_flag = true; + done.notify_all(); + } + } + }); + } + mutex_lock l(done_lock); + if (!done_flag) { + done.wait(l); + } +} +BENCHMARK(BM_ParallelFor) + ->ArgPair(1 << 10, 1) + ->ArgPair(1 << 20, 1) + ->ArgPair(1 << 10, 1 << 10) + ->ArgPair(1 << 20, 1 << 10) + ->ArgPair(1 << 10, 1 << 20) + ->ArgPair(1 << 20, 1 << 20) + ->ArgPair(1 << 10, 1 << 30) + ->ArgPair(1 << 20, 1 << 30); + } // namespace thread } // namespace tensorflow diff --git a/tensorflow/core/platform/default/context.h b/tensorflow/core/platform/default/context.h index d8afeb47a9..682f64c26d 100644 --- a/tensorflow/core/platform/default/context.h +++ b/tensorflow/core/platform/default/context.h @@ -22,6 +22,8 @@ class Context { public: Context() {} Context(const ContextKind kind) {} + + bool operator==(const Context& other) const { return true; } }; class WithContext { -- GitLab From 72bbc7f03b6bbd996f5bc4e14c29429612978974 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 18:01:13 -0800 Subject: [PATCH 139/311] Add fields to TfOpStats to store step-related information of some host operations. Also include the starting time of a device step in StepInfoResult. PiperOrigin-RevId: 187262025 --- .../contrib/tpu/profiler/tf_op_stats.proto | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index 2094294baa..e5c798aa2f 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -77,6 +77,8 @@ message StepInfoResult { // The infeed duration in picoseconds. // Can turn into a map if we want a variable number of ops. optional uint64 infeed_duration_ps = 3; + // The start time of this step in picoseconds. + optional uint64 begin_ps = 4; } // Result proto for a sequence of steps. @@ -155,6 +157,54 @@ message RunEnvironmentResult { repeated HostDependentJobInfoResult host_dependent_job_info = 6; } +// The types of host operations that are tracked. +enum HostOp { + // Invalid host op. + kINVALIDHostOp = 0; + // Each of host op type has two parts: + // (1) the stage where the op happens and (2) the op name. + // stage = Input Data Producer, op = Get Next Batch. + kInputDataProducerGetNextBatch = 1; + // stage = Input Data Producer, op = Session Run. + kInputDataProducerSessionRun = 2; + // stage = Input Data Producer, op = Forward Batch. + kInputDataProducerForwardBatch = 3; + // stage = Infeed Thread, op = Get Next Batch. + kInfeedThreadGetNextBatch = 4; + // stage = Infeed Thread, op = Session Run. + kInfeedThreadSessionRun = 5; + // stage = Infeed Thread, op = Forward Batch. + kInfeedThreadForwardBatch = 6; + // stage = Outfeed Thread, op = Get Next Batch. + kOutfeedThreadGetNextBatch = 7; + // stage = Outfeed Thread, op = Session Run. + kOutfeedThreadSessionRun = 8; + // stage = Outfeed Thread, op = Forward Batch. + kOutfeedThreadForwardBatch = 9; +} + +// Result proto for the host ops per TPU step. +message HostOpsPerTpuStep { + // Whether the data in this message is valid. + optional bool valid = 1 [default = false]; + // The current TPU step number. + optional uint32 tpu_step_num = 2; + // The beginning time of the current TPU step on the device in picoseconds. + optional uint64 tpu_step_begin_ps = 3; + // The ending time of the current TPU step on the device in picoseconds. + optional uint64 tpu_step_end_ps = 4; + // For each possible host operation, maps to the difference between the TPU + // step number that the host op targets and the current TPU step number. + // The key is HostOp, value is the step difference. + map step_diffs = 5; +} + +// Result proto for the host ops for all TPU steps. +message HostOpsResult { + // A sequence of HostOpsPerTpuStep (one for each TPU step) + repeated HostOpsPerTpuStep host_op_sequence = 1; +} + // Result proto for TfStatsHelper. message TfOpStats { // The result for the TF-metric database. @@ -171,4 +221,6 @@ message TfOpStats { optional double matrix_unit_utilization_percent = 6; // The run environment of this profiling session. optional RunEnvironmentResult run_environment = 7; + // The result for the host operations. + optional HostOpsResult host_ops = 8; } -- GitLab From 887c54728f713ec76ea486c94c25dfca791a10c1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 19:09:38 -0800 Subject: [PATCH 140/311] Adopt Eigen::DenseIndex in lieu of int64 for a few variables (to appease compiler warnings/errors). PiperOrigin-RevId: 187268113 --- tensorflow/core/kernels/split_op.cc | 8 ++++---- tensorflow/core/kernels/split_v_op.cc | 4 ++-- tensorflow/core/kernels/unpack_op.cc | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc index 1bc92a4f70..7cc3c532c9 100644 --- a/tensorflow/core/kernels/split_op.cc +++ b/tensorflow/core/kernels/split_op.cc @@ -231,10 +231,10 @@ class SplitOpCPU : public SplitOpBase { if (prefix_dim_size == 1) { auto input_reshaped = input.shaped({split_dim_size, suffix_dim_size}); - auto make_sizes = [&](int64 split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{split_size, suffix_dim_size}; }; - auto reshape_result = [&](Tensor* result, int64 split_size) { + auto reshape_result = [&](Tensor* result, Eigen::DenseIndex split_size) { return result->shaped({split_size, suffix_dim_size}); }; SplitOpCPUImpl{}( @@ -244,11 +244,11 @@ class SplitOpCPU : public SplitOpBase { } else { auto input_reshaped = input.shaped( {prefix_dim_size, split_dim_size, suffix_dim_size}); - auto make_sizes = [&](int64 split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{prefix_dim_size, split_size, suffix_dim_size}; }; - auto reshape_result = [&](Tensor* result, int64 split_size) { + auto reshape_result = [&](Tensor* result, Eigen::DenseIndex split_size) { return result->shaped( {prefix_dim_size, split_size, suffix_dim_size}); }; diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 51d96a17b3..0681ff1198 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -293,7 +293,7 @@ class SplitVOpCPU : public SplitVOpBase { if (prefix_dim_size == 1) { auto input_reshaped = input.shaped({split_dim_size, suffix_dim_size}); - auto make_sizes = [&](Tlen split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{split_size, suffix_dim_size}; }; auto reshape_result = [&](Tensor* result, Tlen split_size) { @@ -306,7 +306,7 @@ class SplitVOpCPU : public SplitVOpBase { } else { auto input_reshaped = input.shaped( {prefix_dim_size, split_dim_size, suffix_dim_size}); - auto make_sizes = [&](Tlen split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{prefix_dim_size, split_size, suffix_dim_size}; }; diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index 4376df34be..1e1647db5c 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -90,16 +90,16 @@ class UnpackOp : public OpKernel { } #endif // TENSORFLOW_USE_SYCL - int64 before_dim = 1; + Eigen::DenseIndex before_dim = 1; for (int i = 0; i < axis; ++i) { before_dim *= input_shape.dim_size(i); } - int64 after_dim = 1; + Eigen::DenseIndex after_dim = 1; for (int i = axis + 1; i < input_shape.dims(); ++i) { after_dim *= input_shape.dim_size(i); } - const int64 axis_dim = input_shape.dim_size(axis); + const Eigen::DenseIndex axis_dim = input_shape.dim_size(axis); // Except for shape, unpack is a special case of split, so we reuse the // same computational kernels. -- GitLab From f6bda409206dc642d7a6f02842e76b0be7234491 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 27 Feb 2018 19:11:43 -0800 Subject: [PATCH 141/311] [tf.data] Unify behavior for `Dataset.shuffle(..., seed=0)` and `Dataset.shuffle(..., seed=tf.constant(0, dtype=tf.int64))`. Previously, the Python integer argument would give a deterministic seeding, and the tf.Tensor argument would give a non-deterministic seeding when the graph seed was not set. This change fixes the behavior so that both versions give the same deterministic seeding. This change also applies the same fix to `tf.contrib.data.shuffle_and_repeat()` and `RandomDataset`. Fixes #17284. PiperOrigin-RevId: 187268252 --- .../contrib/data/python/ops/random_ops.py | 14 +--- .../contrib/data/python/ops/shuffle_ops.py | 14 +--- .../kernel_tests/shuffle_dataset_op_test.py | 27 ++++++ tensorflow/python/data/ops/BUILD | 1 + tensorflow/python/data/ops/dataset_ops.py | 13 +-- tensorflow/python/data/util/BUILD | 24 ++++++ tensorflow/python/data/util/random_seed.py | 58 +++++++++++++ .../python/data/util/random_seed_test.py | 83 +++++++++++++++++++ 8 files changed, 199 insertions(+), 35 deletions(-) create mode 100644 tensorflow/python/data/util/random_seed.py create mode 100644 tensorflow/python/data/util/random_seed_test.py diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py index 7d727165fe..28ef5e50f3 100644 --- a/tensorflow/contrib/data/python/ops/random_ops.py +++ b/tensorflow/contrib/data/python/ops/random_ops.py @@ -19,11 +19,10 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import random_seed from tensorflow.python.data.util import sparse -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_dataset_ops @@ -34,16 +33,7 @@ class RandomDataset(dataset_ops.Dataset): def __init__(self, seed=None): """A `Dataset` of pseudorandom values.""" super(RandomDataset, self).__init__() - seed, seed2 = random_seed.get_seed(seed) - if seed is None: - self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") - else: - self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") - if seed2 is None: - self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") - else: - self._seed2 = ops.convert_to_tensor( - seed2, dtype=dtypes.int64, name="seed2") + self._seed, self._seed2 = random_seed.get_seed(seed) def _as_variant_tensor(self): return gen_dataset_ops.random_dataset( diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py index 99bb79bc06..f35795abd3 100644 --- a/tensorflow/contrib/data/python/ops/shuffle_ops.py +++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py @@ -19,11 +19,11 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import random_seed from tensorflow.python.data.util import sparse from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.ops import gen_dataset_ops @@ -45,17 +45,7 @@ class _ShuffleAndRepeatDataset(dataset_ops.Dataset): else: self._count = ops.convert_to_tensor( count, dtype=dtypes.int64, name="count") - - seed, seed2 = random_seed.get_seed(seed) - if seed is None: - self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") - else: - self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") - if seed2 is None: - self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") - else: - self._seed2 = ops.convert_to_tensor( - seed2, dtype=dtypes.int64, name="seed2") + self._seed, self._seed2 = random_seed.get_seed(seed) def _as_variant_tensor(self): # pylint: disable=protected-access diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py index c089fb08c1..5fcc48831f 100644 --- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py @@ -132,6 +132,33 @@ class ShuffleDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testSeedZero(self): + """Test for same behavior when the seed is a Python or Tensor zero.""" + iterator = ( + dataset_ops.Dataset.range(10).shuffle(10, seed=0) + .make_one_shot_iterator()) + get_next = iterator.get_next() + + elems = [] + with self.test_session() as sess: + for _ in range(10): + elems.append(sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + seed_placeholder = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = ( + dataset_ops.Dataset.range(10).shuffle(10, seed=seed_placeholder) + .make_initializable_iterator()) + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer, feed_dict={seed_placeholder: 0}) + for elem in elems: + self.assertEqual(elem, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + def testDefaultArguments(self): components = [0, 1, 2, 3, 4] iterator = (dataset_ops.Dataset.from_tensor_slices(components).shuffle(5) diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index f12b358a7d..dc293562ab 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -23,6 +23,7 @@ py_library( "//tensorflow/python:tensor_util", "//tensorflow/python:util", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:random_seed", "//tensorflow/python/data/util:sparse", "//third_party/py/numpy", ], diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 3fb1f8d547..5751f35fe1 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -26,13 +26,13 @@ import six from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import random_seed from tensorflow.python.data.util import sparse from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util @@ -1484,16 +1484,7 @@ class ShuffleDataset(Dataset): self._input_dataset = input_dataset self._buffer_size = ops.convert_to_tensor( buffer_size, dtype=dtypes.int64, name="buffer_size") - seed, seed2 = random_seed.get_seed(seed) - if seed is None: - self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") - else: - self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") - if seed2 is None: - self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") - else: - self._seed2 = ops.convert_to_tensor( - seed2, dtype=dtypes.int64, name="seed2") + self._seed, self._seed2 = random_seed.get_seed(seed) if reshuffle_each_iteration is None: self._reshuffle_each_iteration = True else: diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index e32c7b54a4..b1bdbdab37 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -86,6 +86,30 @@ py_test( ], ) +py_library( + name = "random_seed", + srcs = ["random_seed.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework", + ], +) + +py_test( + name = "random_seed_test", + size = "small", + srcs = ["random_seed_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":random_seed", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:util", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py new file mode 100644 index 0000000000..e2c9d8672f --- /dev/null +++ b/tensorflow/python/data/util/random_seed.py @@ -0,0 +1,58 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for generating Tensor-valued random seeds.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def get_seed(seed): + """Returns the local seeds an operation should use given an op-specific seed. + + See @{tf.get_seed} for more details. This wrapper adds support for the case + where `seed` may be a tensor. + + Args: + seed: An integer or a @{tf.int64} scalar tensor. + + Returns: + A tuple of two @{tf.int64} scalar tensors that should be used for the local + seed of the calling dataset. + """ + seed, seed2 = random_seed.get_seed(seed) + if seed is None: + seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") + else: + seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") + if seed2 is None: + seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") + else: + with ops.name_scope("seed2") as scope: + seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64) + seed2 = array_ops.where( + math_ops.logical_and( + math_ops.equal(seed, 0), math_ops.equal(seed2, 0)), + constant_op.constant(2**31 - 1, dtype=dtypes.int64), + seed2, + name=scope) + return seed, seed2 diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py new file mode 100644 index 0000000000..c3a2dc0537 --- /dev/null +++ b/tensorflow/python/data/util/random_seed_test.py @@ -0,0 +1,83 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for utilities working with arbitrarily nested structures.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.util import random_seed as data_random_seed +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test + + +class RandomSeedTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testRandomSeed(self): + zero_t = constant_op.constant(0, dtype=dtypes.int64, name='zero') + one_t = constant_op.constant(1, dtype=dtypes.int64, name='one') + intmax_t = constant_op.constant( + 2**31 - 1, dtype=dtypes.int64, name='intmax') + test_cases = [ + # Each test case is a tuple with input to get_seed: + # (input_graph_seed, input_op_seed) + # and output from get_seed: + # (output_graph_seed, output_op_seed) + ((None, None), (0, 0)), + ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)), + ((1, 1), (1, 1)), + ((0, 0), (0, 2**31 - 1)), # Avoid nondeterministic (0, 0) output + ((2**31 - 1, 0), (0, 2**31 - 1)), # Don't wrap to (0, 0) either + ((0, 2**31 - 1), (0, 2**31 - 1)), # Wrapping for the other argument + # Once more, with tensor-valued arguments + ((None, one_t), (random_seed.DEFAULT_GRAPH_SEED, 1)), + ((1, one_t), (1, 1)), + ((0, zero_t), (0, 2**31 - 1)), # Avoid nondeterministic (0, 0) output + ((2**31 - 1, zero_t), (0, 2**31 - 1)), # Don't wrap to (0, 0) either + ((0, intmax_t), (0, 2**31 - 1)), # Wrapping for the other argument + ] + for tc in test_cases: + tinput, toutput = tc[0], tc[1] + random_seed.set_random_seed(tinput[0]) + g_seed, op_seed = data_random_seed.get_seed(tinput[1]) + g_seed = self.evaluate(g_seed) + op_seed = self.evaluate(op_seed) + msg = 'test_case = {0}, got {1}, want {2}'.format( + tinput, (g_seed, op_seed), toutput) + self.assertEqual((g_seed, op_seed), toutput, msg=msg) + random_seed.set_random_seed(None) + + if context.in_graph_mode(): + random_seed.set_random_seed(1) + tinput = (1, None) + toutput = (1, ops.get_default_graph()._last_id) # pylint: disable=protected-access + random_seed.set_random_seed(tinput[0]) + g_seed, op_seed = data_random_seed.get_seed(tinput[1]) + g_seed = self.evaluate(g_seed) + op_seed = self.evaluate(op_seed) + msg = 'test_case = {0}, got {1}, want {2}'.format(1, (g_seed, op_seed), + toutput) + self.assertEqual((g_seed, op_seed), toutput, msg=msg) + random_seed.set_random_seed(None) + + +if __name__ == '__main__': + test.main() -- GitLab From 891bf22087c271b26325c3f81e4ef08b6b8af6c1 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 27 Feb 2018 19:31:17 -0800 Subject: [PATCH 142/311] Cleanup post moving record gradient to C - Remove unnecessary tuple build (when not needed) - Stop passing record gradient from python PiperOrigin-RevId: 187269557 --- .../python/eager/python_eager_op_gen.cc | 6 +-- tensorflow/python/eager/pywrap_tfe.h | 9 ++-- tensorflow/python/eager/pywrap_tfe_src.cc | 46 ++++++++--------- tensorflow/python/eager/pywrap_tfe_test.py | 49 +++++++++---------- 4 files changed, 49 insertions(+), 61 deletions(-) diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index e6d03297e0..554e29c7e0 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -712,9 +712,9 @@ bool GenEagerPythonOp::AddEagerFallbackCode( } void GenEagerPythonOp::AddEagerFastPathExecute() { - string fastpath_execute_params = strings::StrCat( - "_ctx._handle, _ctx.device_name, \"", op_def_.name(), "\", ", - "_execute.record_gradient, name, _ctx._post_execution_callbacks"); + string fastpath_execute_params = + strings::StrCat("_ctx._handle, _ctx.device_name, \"", op_def_.name(), + "\", ", "name, _ctx._post_execution_callbacks"); string fallback_params; for (int i = 0; i < api_def_.in_arg_size(); i++) { diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index f9692a8910..b1b4a6b214 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -160,13 +160,10 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, // Item 2: device_name: Name of the device on which to execute the operation, // or NULL for automatic selection. // Item 3: op_name: Name of the TensorFlow op to execute. -// Item 4: record_gradient_callback: Callback that records the gradient of the -// result. The callback takes (op_name, inputs, attrs, result, name) -// - all sequences and records the gradient. -// Item 5: name: An optional name for the operation. -// Item 6: List representing all callbacks to execute after successful +// Item 4: name: An optional name for the operation. +// Item 5: List representing all callbacks to execute after successful // op execute. -// Item 7 onwards: inputs - This is a list of inputs followed by a list of +// Item 6 onwards: inputs - This is a list of inputs followed by a list of // attrs. It is not necessary for type attrs to be present. // // This is named _C since there doesn't seem to be any way to make it visible diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 30e08c8e65..42d97dfe3f 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/python/eager/pywrap_tensor.h" +#include "tensorflow/python/lib/core/safe_ptr.h" using tensorflow::string; using tensorflow::strings::Printf; @@ -1364,7 +1365,7 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, } namespace { -static const int kFastPathExecuteInputStartIndex = 6; +static const int kFastPathExecuteInputStartIndex = 5; PyObject* GetPythonObjectFromString(const char* s) { #if PY_MAJOR_VERSION >= 3 @@ -1621,46 +1622,43 @@ bool RunCallbacks(bool run_gradient_callback, bool run_post_exec_callbacks, const std::vector& flattened_inputs, const std::vector& flattened_attrs, PyObject* flattened_result, PyObject* op_name, PyObject* name, - PyObject* record_gradient_callback, PyObject* callbacks) { - PyObject* inputs = PyTuple_New(flattened_inputs.size()); + PyObject* callbacks) { + tensorflow::Safe_PyObjectPtr inputs = + tensorflow::make_safe(PyTuple_New(flattened_inputs.size())); for (int i = 0; i < flattened_inputs.size(); i++) { PyObject* input = flattened_inputs[i]; Py_INCREF(input); - PyTuple_SET_ITEM(inputs, i, input); + PyTuple_SET_ITEM(inputs.get(), i, input); } int num_non_inferred_attrs = PyTuple_GET_SIZE(args) - op_def->input_arg_size() - kFastPathExecuteInputStartIndex; int num_attrs = flattened_attrs.size() + num_non_inferred_attrs; - PyObject* attrs = PyTuple_New(num_attrs); + tensorflow::Safe_PyObjectPtr attrs = + tensorflow::make_safe(PyTuple_New(num_attrs)); for (int i = 0; i < num_non_inferred_attrs; i++) { auto* attr = PyTuple_GET_ITEM( args, kFastPathExecuteInputStartIndex + op_def->input_arg_size() + i); Py_INCREF(attr); - PyTuple_SET_ITEM(attrs, i, attr); + PyTuple_SET_ITEM(attrs.get(), i, attr); } for (int i = num_non_inferred_attrs; i < num_attrs; i++) { // Not INCREFing anything in flattened_attrs as each of those is a new // reference, so allow the attrs tuple to steal the reference. - PyTuple_SET_ITEM(attrs, i, flattened_attrs.at(i - num_non_inferred_attrs)); + PyTuple_SET_ITEM(attrs.get(), i, + flattened_attrs.at(i - num_non_inferred_attrs)); } - PyObject* callback_args = - Py_BuildValue("OOOOO", op_name, inputs, attrs, flattened_result, name); - - auto cleaner = tensorflow::gtl::MakeCleanup([inputs, attrs, callback_args] { - Py_DECREF(inputs); - Py_DECREF(attrs); - Py_DECREF(callback_args); - }); - if (run_gradient_callback) { - RecordGradient(op_name, inputs, attrs, flattened_result, name); + RecordGradient(op_name, inputs.get(), attrs.get(), flattened_result, name); } if (run_post_exec_callbacks) { + tensorflow::Safe_PyObjectPtr callback_args = tensorflow::make_safe( + Py_BuildValue("OOOOO", op_name, inputs.get(), attrs.get(), + flattened_result, name)); for (Py_ssize_t i = 0; i < PyList_Size(callbacks); i++) { PyObject* callback_fn = PyList_GET_ITEM(callbacks, i); if (!PyCallable_Check(callback_fn)) { @@ -1673,7 +1671,7 @@ bool RunCallbacks(bool run_gradient_callback, bool run_post_exec_callbacks, return false; } PyObject* callback_result = - PyObject_CallObject(callback_fn, callback_args); + PyObject_CallObject(callback_fn, callback_args.get()); if (!callback_result) { return false; } @@ -1703,9 +1701,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyObject* op_name = PyTuple_GET_ITEM(args, 2); const tensorflow::OpDef* op_def = GetOpDef(op_name); if (op_def == nullptr) return nullptr; - PyObject* record_gradient_callback = PyTuple_GET_ITEM(args, 3); - PyObject* name = PyTuple_GET_ITEM(args, 4); - PyObject* callbacks = PyTuple_GET_ITEM(args, 5); + PyObject* name = PyTuple_GET_ITEM(args, 3); + PyObject* callbacks = PyTuple_GET_ITEM(args, 4); if (args_size < kFastPathExecuteInputStartIndex + op_def->input_arg_size()) { PyErr_SetString( @@ -1775,9 +1772,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { // (similar to benchmark_tf_gradient_function_*). Also consider using an // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks // point out problems with heap allocs. - bool run_gradient_callback = !*ThreadTapeIsStopped() && - !GetTapeSet()->empty() && - record_gradient_callback != Py_None; + bool run_gradient_callback = + !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); bool run_post_exec_callbacks = callbacks != Py_None && PyList_Size(callbacks) > 0; bool run_callbacks = run_gradient_callback || run_post_exec_callbacks; @@ -1916,7 +1912,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { if (run_callbacks && !RunCallbacks(run_gradient_callback, run_post_exec_callbacks, op_def, args, *flattened_inputs, *flattened_attrs, flat_result, - op_name, name, record_gradient_callback, callbacks)) { + op_name, name, callbacks)) { return nullptr; } diff --git a/tensorflow/python/eager/pywrap_tfe_test.py b/tensorflow/python/eager/pywrap_tfe_test.py index 49323e6640..418ed75178 100644 --- a/tensorflow/python/eager/pywrap_tfe_test.py +++ b/tensorflow/python/eager/pywrap_tfe_test.py @@ -21,7 +21,6 @@ from __future__ import print_function from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context -from tensorflow.python.eager import execute from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util @@ -46,15 +45,13 @@ class Tests(test.TestCase): self.assertAllClose( math_ops.matmul(a_2_by_2, b_2_by_2), pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, - None, None, a_2_by_2, b_2_by_2, "transpose_a", False, "transpose_b", - False)) + ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, + b_2_by_2, "transpose_a", False, "transpose_b", False)) self.assertAllClose( math_ops.matmul(a_100_by_784, b_100_by_784, transpose_b=True), pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, - None, None, a_100_by_784, b_100_by_784, "transpose_a", False, - "transpose_b", True)) + ctx._handle, ctx.device_name, "MatMul", None, None, a_100_by_784, + b_100_by_784, "transpose_a", False, "transpose_b", True)) @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created @@ -64,8 +61,8 @@ class Tests(test.TestCase): a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) tape.watch(a_2_by_2) z = pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, None, - None, a_2_by_2, a_2_by_2, "transpose_a", False, "transpose_b", False) + ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, + a_2_by_2, "transpose_a", False, "transpose_b", False) dz_dy = tape.gradient(z, [a_2_by_2])[0] self.assertAllEqual(dz_dy.numpy(), constant_op.constant(4.0, shape=[2, 2]).numpy()) @@ -80,9 +77,9 @@ class Tests(test.TestCase): self.assertAllClose( math_ops.add_n([a_2_by_2, b_2_by_2]), - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "AddN", execute.record_gradient, None, - None, [a_2_by_2, b_2_by_2])) + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name, + "AddN", None, None, + [a_2_by_2, b_2_by_2])) # Tests homogeneous list op @test_util.assert_no_new_tensors @@ -96,8 +93,8 @@ class Tests(test.TestCase): tape.watch(a_2_by_2) tape.watch(b_2_by_2) z1 = pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "AddN", execute.record_gradient, None, - None, [a_2_by_2, b_2_by_2]) + ctx._handle, ctx.device_name, "AddN", None, None, + [a_2_by_2, b_2_by_2]) z2 = math_ops.add_n([a_2_by_2, b_2_by_2]) dz1_dy = tape.gradient(z1, [a_2_by_2])[0] dz2_dy = tape.gradient(z2, [a_2_by_2])[0] @@ -113,9 +110,9 @@ class Tests(test.TestCase): self.assertAllClose( array_ops.identity_n([a_2_by_2, b_2_by_2]), - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "IdentityN", execute.record_gradient, - None, None, [a_2_by_2, b_2_by_2])) + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name, + "IdentityN", None, None, + [a_2_by_2, b_2_by_2])) # Tests heterogeneous list op @test_util.assert_no_new_tensors @@ -129,8 +126,8 @@ class Tests(test.TestCase): tape.watch(a_2_by_2) tape.watch(b_2_by_2) z1 = pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "IdentityN", execute.record_gradient, - None, None, [a_2_by_2, b_2_by_2]) + ctx._handle, ctx.device_name, "IdentityN", None, None, + [a_2_by_2, b_2_by_2]) z2 = array_ops.identity_n([a_2_by_2, b_2_by_2]) dz1_dy = tape.gradient(z1[0], [a_2_by_2])[0] dz2_dy = tape.gradient(z2[0], [a_2_by_2])[0] @@ -147,22 +144,20 @@ class Tests(test.TestCase): # Not enough base params with self.assertRaisesRegexp(ValueError, - "at least 6 items in the input tuple"): + "at least 5 items in the input tuple"): pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, "Identity") # Not enough inputs with self.assertRaisesRegexp(ValueError, - "Expected to be at least 7, was 6"): - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx_handle, ctx_handle, "Identity", backprop._record_gradient, None, - []) + "Expected to be at least 6, was 5"): + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx_handle, + "Identity", None, []) # Bad type with self.assertRaisesRegexp(TypeError, "expected a string for op_name"): - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx_handle, ctx.device_name, ctx_handle, backprop._record_gradient, - None, [], a_2_by_2) + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, + ctx_handle, None, [], a_2_by_2) if __name__ == "__main__": -- GitLab From ae4c23db58c6436786bbcdea4a15aa814d642220 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 20:16:16 -0800 Subject: [PATCH 143/311] Improve handling of undefined split_dim_tensor in the split_v op. PiperOrigin-RevId: 187272486 --- tensorflow/core/kernels/split_v_op.cc | 7 ++++++- tensorflow/python/kernel_tests/split_op_test.py | 14 ++++++++++++++ tensorflow/python/ops/array_ops.py | 4 +++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 0681ff1198..0ce0b552e6 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -55,8 +55,13 @@ class SplitVOpBase : public OpKernel { const Tensor& input = context->input(0); const TensorShape& input_shape = input.shape(); const Tensor& split_tensor = context->input(1); + const Tensor& split_dim_tensor = context->input(2); - const int32 split_dim_orig = context->input(2).flat()(0); + OP_REQUIRES(context, split_dim_tensor.NumElements() == 1, + errors::InvalidArgument("split_dim_tensor must have " + "exactly one element.")); + + const int32 split_dim_orig = split_dim_tensor.flat()(0); const int32 split_dim = split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig; diff --git a/tensorflow/python/kernel_tests/split_op_test.py b/tensorflow/python/kernel_tests/split_op_test.py index 6171793b14..8cfee3eb93 100644 --- a/tensorflow/python/kernel_tests/split_op_test.py +++ b/tensorflow/python/kernel_tests/split_op_test.py @@ -336,6 +336,20 @@ class SplitOpTest(test.TestCase): for s in splits: self.assertEqual(None, s.get_shape().ndims) + def testNonexistentDimTensor(self): + x = array_ops.placeholder(dtypes.int32) + values = np.zeros([5, 30]) + splits = array_ops.placeholder(dtypes.int32) + with self.assertRaisesRegexp(ValueError, "Cannot infer"): + y = array_ops.split(values, splits, axis=x) + + splits = array_ops.placeholder(dtypes.int32, [3]) + y = array_ops.split(values, splits, axis=x) + with self.test_session(use_gpu=True) as sess: + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "must have exactly one element"): + sess.run(y, {x: np.array([], dtype=np.int32), splits: [4, 11, 15]}) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index cdfb955f54..3db3d84475 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1380,7 +1380,9 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"): axis=axis, num_split=num_or_size_splits, value=value, name=name) if num is None: - num = size_splits._shape_tuple()[0] + size_splits_shape = size_splits._shape_tuple() + if size_splits_shape: + num = size_splits_shape[0] if num is None: raise ValueError("Cannot infer num from shape %s" % num_or_size_splits) -- GitLab From c38a16dbcc5de5fa5579a3e48ec12be316a2cb3f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 21:24:24 -0800 Subject: [PATCH 144/311] Adds poisson_regression_head. PiperOrigin-RevId: 187277651 --- tensorflow/contrib/estimator/BUILD | 2 + .../estimator/python/estimator/head.py | 61 ++++++++++++++++ .../estimator/python/estimator/head_test.py | 71 +++++++++++++++++++ 3 files changed, 134 insertions(+) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index ddccfce3c0..773c6ab6c7 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -170,6 +170,7 @@ py_library( "//tensorflow/python:lookup_ops", "//tensorflow/python:math_ops", "//tensorflow/python:metrics", + "//tensorflow/python:nn", "//tensorflow/python:sparse_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:summary", @@ -192,6 +193,7 @@ py_test( ":head", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index a45f6934cc..f95fcc8039 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib +from tensorflow.python.ops import nn from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.losses import losses from tensorflow.python.saved_model import signature_constants @@ -237,6 +238,66 @@ def regression_head(weight_column=None, name=name) +def poisson_regression_head( + weight_column=None, + label_dimension=1, + loss_reduction=losses.Reduction.SUM, + compute_full_loss=True, + name=None): + """Creates a `_Head` for poisson regression using `tf.nn.log_poisson_loss`. + + The loss is the weighted sum over all input dimensions. Namely, if the input + labels have shape `[batch_size, label_dimension]`, the loss is the weighted + sum over both `batch_size` and `label_dimension`. + + The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. + In many applications, the shape is `[batch_size, label_dimension]`. + + The `labels` shape must match `logits`, namely + `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape + `[D0, D1, ... DN]` is also supported. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or + `[D0, D1, ... DN, label_dimension]`. + + This is implemented as a generalized linear model, see + https://en.wikipedia.org/wiki/Generalized_linear_model. + + Args: + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + label_dimension: Number of regression labels per example. This is the size + of the last dimension of the labels `Tensor` (typically, this has shape + `[batch_size, label_dimension]`). + loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to + reduce training loss over batch. Defaults to `SUM`. + compute_full_loss: Whether to include the constant `log(z!)` term in + computing the poisson loss. See `tf.nn.log_poisson_loss` for the full + documentation. + name: name of the head. If provided, summary and metrics keys will be + suffixed by `"/" + name`. Also used as `name_scope` when creating ops. + + Returns: + An instance of `_Head` for poisson regression. + + Raises: + ValueError: If `label_dimension` or `loss_reduction` is invalid. + """ + def _poisson_loss(labels, logits): + return nn.log_poisson_loss( + targets=labels, log_input=logits, compute_full_loss=compute_full_loss) + return head_lib._regression_head_with_mean_squared_error_loss( # pylint:disable=protected-access + weight_column=weight_column, + label_dimension=label_dimension, + loss_reduction=loss_reduction, + loss_fn=_poisson_loss, + inverse_link_fn=math_ops.exp, + name=name) + + def multi_label_head(n_classes, weight_column=None, thresholds=None, diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index 1411635228..76d050cb28 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops @@ -1106,5 +1107,75 @@ class MultiLabelHead(test.TestCase): expected_metrics=expected_metrics) +class PoissonRegressionHead(test.TestCase): + + def setUp(self): + ops.reset_default_graph() + + def test_train(self): + head = head_lib.poisson_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + labels = np.array([[1], [2], [3]], dtype=np.int32) + # With x = exp(logits), z = labels. + # loss = -ln(exp(-x) * (x^z) / z!) + # = x - z * ln(x) + ln(z!) + # = exp(logits) - labels * logits - ln(labels!) + # But for ln(z!) and z > 1, the Stirling approximation is used + # ln(z!) = z*ln(z) - z + 0.5*ln(2*pi*z) + # loss = [exp(0) - 1 * 0 + ln(1!), + # exp(-1) - 2 * (-1) + 2*ln(2) - 2 + 0.5*ln(2*pi*2), + # exp(1) - 3 * 1 + 3*ln(3) - 3 + 0.5*ln(2*pi*3)] + # = [1.0, 3.020, 1.482] + # sum_loss = 5.502 + expected_loss = 5.502 + atol = 0.001 + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + with ops.control_dependencies((check_ops.assert_near( + math_ops.to_float(expected_loss), math_ops.to_float(loss), + atol=atol, name='assert_loss'),)): + return constant_op.constant(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run([spec.loss, spec.train_op]) + self.assertAlmostEqual(expected_loss, loss, delta=atol) + self.assertEqual(expected_train_result, train_result) + + def test_predict(self): + head = head_lib.poisson_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + expected_predictions = np.exp(logits) + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) + + # Assert spec contains expected tensors. + keys = prediction_keys.PredictionKeys + self.assertItemsEqual( + (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys()) + self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype) + self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype) + + # Assert predictions. + with self.test_session(): + _initialize_variables(self, spec.scaffold) + self.assertAllClose( + expected_predictions, spec.predictions[keys.PREDICTIONS].eval()) + self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval()) + + if __name__ == '__main__': test.main() -- GitLab From 503d9b522e28272e032bc45a10e3c0f21398a16e Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 28 Feb 2018 00:07:55 -0800 Subject: [PATCH 145/311] [XLA:Evaluator] Handle while loop. * Add while loop support to HloEvaluator; * Add a max_loop_iteration argument to the interpreter's constructor to limit the number of loop iterations that will be evaluated (or no bound if -1). Maintain current constant propagation behavior by setting limit to 0 for evaluators used for CP. PiperOrigin-RevId: 187287574 --- .../xla/service/hlo_constant_folding.cc | 5 ++- .../compiler/xla/service/hlo_evaluator.cc | 41 ++++++++++++++++--- .../compiler/xla/service/hlo_evaluator.h | 10 ++++- .../xla/service/while_loop_simplifier.cc | 2 +- tensorflow/compiler/xla/tests/BUILD | 3 ++ tensorflow/compiler/xla/tests/while_test.cc | 4 +- 6 files changed, 55 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.cc b/tensorflow/compiler/xla/service/hlo_constant_folding.cc index 53450991b6..35ecd4428d 100644 --- a/tensorflow/compiler/xla/service/hlo_constant_folding.cc +++ b/tensorflow/compiler/xla/service/hlo_constant_folding.cc @@ -35,7 +35,10 @@ limitations under the License. namespace xla { StatusOr HloConstantFolding::Run(HloModule* module) { - auto evaluator = MakeUnique(); + // Limit the constant folding to 0 iterations to skip folding loops. This + // retains the behavior from before while loop support in HloEvaluator and may + // be revised. + auto evaluator = MakeUnique(/*max_loop_iterations=*/0); XLA_VLOG_LINES(2, "HloConstantFolding::Run(), before:\n" + module->ToString()); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 8c7459099d..c3a3251b7d 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1372,7 +1372,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(map->shape()); - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); TF_RETURN_IF_ERROR( result->Populate([&](ArraySlice multi_index) { std::vector> arg_literals; @@ -1507,7 +1507,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } } - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); // For each resulting dimension, calculate and assign computed value. TF_RETURN_IF_ERROR( result->Populate([&](ArraySlice multi_index) { @@ -1581,7 +1581,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { int64 rank = ShapeUtil::Rank(operand_literal.shape()); - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); DimensionVector source_index(rank); std::fill(source_index.begin(), source_index.end(), 0); @@ -1692,7 +1692,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { DimensionVector window_index(window.dimensions_size()); DimensionVector operand_index(ShapeUtil::Rank(operand_literal.shape())); - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); // For each resulting dimension, calculate and assign computed value. TF_RETURN_IF_ERROR( result->Populate([&](ArraySlice output_index) { @@ -2069,7 +2069,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { HloEvaluator* parent_; }; // class HloEvaluator::TypedVisitor -HloEvaluator::HloEvaluator() { +HloEvaluator::HloEvaluator(int64 max_loop_iterations) + : max_loop_iterations_(max_loop_iterations) { typed_visitors_[PRED] = MakeUnique>(this); typed_visitors_[U8] = MakeUnique>(this); typed_visitors_[U16] = MakeUnique([](HloInstruction*) { @@ -2511,6 +2512,36 @@ Status HloEvaluator::HandleConditional(HloInstruction* conditional) { return Status::OK(); } +Status HloEvaluator::HandleWhile(HloInstruction* while_hlo) { + HloComputation* cond_comp = while_hlo->while_condition(); + HloComputation* body_comp = while_hlo->while_body(); + // Initialize the loop carried valued with the input to the While instruction. + auto lcv = GetEvaluatedLiteralFor(while_hlo->operand(0)).CloneToUnique(); + bool keep_going = true; + int64 iteration_count = 0; + HloEvaluator cond_evaluator(max_loop_iterations_); + HloEvaluator loop_body_evaluator(max_loop_iterations_); + while (keep_going) { + if (max_loop_iterations_ >= 0 && iteration_count++ > max_loop_iterations_) { + return InvalidArgument("Loop %s exceeded loop iteration limit (%lld).", + while_hlo->name().c_str(), max_loop_iterations_); + } + TF_ASSIGN_OR_RETURN(auto cond_val, cond_evaluator.Evaluate( + *cond_comp, {lcv.get()})); + keep_going = cond_val->GetFirstElement(); + if (keep_going) { + TF_ASSIGN_OR_RETURN(auto body_val, loop_body_evaluator.Evaluate( + *body_comp, {lcv.get()})); + VLOG(3) << "Loop iteration result: " << body_val->ToString(); + lcv = std::move(body_val); + cond_evaluator.ResetVisitStates(); + loop_body_evaluator.ResetVisitStates(); + } + } + evaluated_[while_hlo] = std::move(lcv); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index fc82011630..8a27cf9a3a 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -36,7 +36,10 @@ namespace xla { // This class is not thread-safe. class HloEvaluator : public DfsHloVisitorWithDefault { public: - HloEvaluator(); + // Only evaluate up to max_loop_iterations per while-loop execution if + // specified. + explicit HloEvaluator(int64 max_loop_iterations = -1); + // Evaluates an HLO module and an array of pointers to literals. // Returns the evaluated result as a literal if successful. // Precondition: The indices of arg_literals correspond to the parameter @@ -157,6 +160,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCall(HloInstruction* call) override; + Status HandleWhile(HloInstruction* while_hlo) override; + private: // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be @@ -194,6 +199,9 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // Must be cleared for each evaluation. std::vector arg_literals_; + // Max loop iterations to execute with no maximum if negative. + int64 max_loop_iterations_; + TF_DISALLOW_COPY_AND_ASSIGN(HloEvaluator); }; diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 981de9b220..c9d77c9376 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -212,7 +212,7 @@ static optional GetLoopTripCount(HloInstruction* while_op) { // Now that we know the index of the induction variable, we can we can try to // compute how many times the loop executes. Start by computing the induction // variable's initial value. - HloEvaluator evaluator; + HloEvaluator evaluator(/*max_loop_iterations=*/0); auto* while_init = while_op->mutable_operand(0); auto* indvar_init = while_init->mutable_operand(*indvar_tuple_idx); StatusOr> indvar_init_result = diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 19b3dfae4e..dc282f2440 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -334,6 +334,9 @@ xla_test( xla_test( name = "while_test", srcs = ["while_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index 52157b837c..33d457c70b 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -910,7 +910,7 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // Per backend the values generated can be different as the different backends // use different random number generators. // TODO(b/32240857): Extend test to verify outputs. -TEST_F(WhileTest, WhileWithPrngScalarResult) { +TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { auto v6s32 = ShapeUtil::MakeShape(S32, {6}); // Create a computation for the condition: repeat for count iterations. @@ -1166,7 +1166,7 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) { // while (f(result).get<0>()) { // result = result + 1; // } -TEST_F(WhileTest, WhileWithCallInsideCondition) { +TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. -- GitLab From 6ac343bdfc942678d64dcbfc4d4fc90c0df6a4a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 03:39:04 -0800 Subject: [PATCH 146/311] [TF:XLA] Fix SplitV implementation to support negative split_dim. Mirror behavior of Split op when a negative split_dim is used. PiperOrigin-RevId: 187304771 --- tensorflow/compiler/tests/binary_ops_test.py | 14 +++++++++++ .../compiler/tf2xla/kernels/split_op.cc | 23 ++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 30a6d3a74d..0e4efaed86 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -1045,6 +1045,20 @@ class BinaryOpsTest(XLATestCase): ], equality_test=self.ListsAreClose) + def splitvOp(x, y): # pylint: disable=invalid-name + return array_ops.split(value=y, num_or_size_splits=[2, 3], axis=x) + for axis in [1, -1]: + self._testBinary( + splitvOp, + np.int32(axis), + np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], + dtype=dtype), + expected=[ + np.array([[0, 1], [5, 6]], dtype=dtype), + np.array([[2, 3, 4], [7, 8, 9]], dtype=dtype), + ], + equality_test=self.ListsAreClose) + def testTile(self): for dtype in self.numeric_types: self._testBinary( diff --git a/tensorflow/compiler/tf2xla/kernels/split_op.cc b/tensorflow/compiler/tf2xla/kernels/split_op.cc index 79c435c90a..43c15e7538 100644 --- a/tensorflow/compiler/tf2xla/kernels/split_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/split_op.cc @@ -111,27 +111,24 @@ class SplitVOp : public XlaOpKernel { void Compile(XlaOpKernelContext* ctx) override { const int32 num_split = num_outputs(); + const TensorShape input_shape = ctx->InputShape(0); const TensorShape index_shape = ctx->InputShape(2); - xla::Literal literal_index; - OP_REQUIRES_OK(ctx, ctx->ConstantInput(2, &literal_index)); - int32 split_dim; - OP_REQUIRES(ctx, index_shape.dims() == 0, - errors::InvalidArgument("split_dim input to Split Op must be a " - "scalar")); - split_dim = literal_index.Get({}); + int64 split_dim_orig; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(2, &split_dim_orig)); + int64 split_dim = split_dim_orig < 0 ? split_dim_orig + input_shape.dims() + : split_dim_orig; + OP_REQUIRES(ctx, 0 <= split_dim && split_dim < input_shape.dims(), + errors::InvalidArgument("-input rank(-", input_shape.dims(), + ") <= split_dim < input rank (", + input_shape.dims(), "), but got ", + split_dim_orig)); xla::ComputationDataHandle input = ctx->Input(0); - const TensorShape input_shape = ctx->InputShape(0); OP_REQUIRES(ctx, input_shape.dims() > 0, errors::InvalidArgument("Can't split a 0 dimensional input")); - OP_REQUIRES( - ctx, 0 <= split_dim && split_dim < input_shape.dims(), - errors::InvalidArgument("0 <= split_dim < number of input dimensions (", - input_shape.dims(), "), but got ", split_dim)); - OP_REQUIRES( ctx, num_split > 0, errors::InvalidArgument( -- GitLab From 19538075bb174ba315a8b2711e60238b5fb92805 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 04:17:53 -0800 Subject: [PATCH 147/311] Clarify tutorials/image_retraining regarding the use of Mobilenets: - The feature depth multiplier controls the dimension of intermediate activations ("number of neurons"). The size of weight matrices depends on its square. - Quantization with TF-Lite only occurs when its TOCO tool is run on the module. That is out of scope here, so discussion of quantization gets replaced by links to TF-Lite and part 2 of the "Poets" codelab. PiperOrigin-RevId: 187307400 --- .../docs_src/tutorials/image_retraining.md | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tensorflow/docs_src/tutorials/image_retraining.md b/tensorflow/docs_src/tutorials/image_retraining.md index df15bc0a9c..246a420400 100644 --- a/tensorflow/docs_src/tutorials/image_retraining.md +++ b/tensorflow/docs_src/tutorials/image_retraining.md @@ -349,31 +349,32 @@ results, but if you intend to deploy your model on mobile devices or other resource-constrained environments you may want to trade off a little accuracy for much smaller file sizes or faster speeds. To help with that, the [retrain.py script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/image_retraining/retrain.py) -supports 32 different variations on the [Mobilenet architecture](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html). +supports different variations on the [Mobilenet architecture](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html). These are a little less precise than Inception v3, but can result in far -smaller file sizes (down to less than a megabyte) and can be many times faster +smaller file sizes (a few megabytes) and can be many times faster to run. To train with one of these models, pass in the `--architecture` flag, for example: ``` python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos --architecture mobilenet_0.25_128_quantized + --image_dir ~/flower_photos --architecture mobilenet_0.25_128 ``` -This will create a 941KB model file in `/tmp/output_graph.pb`, with 25% of the -parameters of the full Mobilenet, taking 128x128 sized input images, and with -its weights quantized down to eight bits on disk. You can choose '1.0', '0.75', -'0.50', or '0.25' to control the number of weight parameters, and so the file -size (and to some extent the speed), '224', '192', '160', or '128' for the input -image size, with smaller sizes giving faster speeds, and an optional -'_quantized' at the end to indicate whether the file should contain 8-bit or -32-bit float weights. +This will create a 1.9MB model file in `/tmp/output_graph.pb`, with only 25% of +the number of neurons of the full Mobilenet, and trained to take 128x128 sized +input images. + +You can choose '1.0', '0.75', '0.50', or '0.25' to control the number of +neurons (activations of hidden layers); the number of weights (and hence to +some extent the file size and speed) shrinks like the square of that fraction. +You can choose '224', '192', '160', or '128' for the input image size, +with smaller sizes giving faster speeds. The speed and size advantages come at a loss to accuracy of course, but for many purposes this isn't critical. They can also be somewhat offset with improved training data. For example, training with distortions allows me to get above 80% -accuracy on the flower data set even with the 0.25/128/quantized graph above. +accuracy on the flower data set even with the 0.25/128 graph above. If you're going to be using the Mobilenet models in label_image or your own programs, you'll need to feed in an image of the specified size converted to a @@ -395,3 +396,9 @@ python tensorflow/examples/label_image/label_image.py \ --input_mean=128 --input_std=128 \ --image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg ``` + +For more information on deploying the retrained model to a mobile device, see +the [codelab version](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) +of this tutorial, especially [part 2](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2-tflite/#0), which describes +[TensorFlow Lite](/mobile/tflite/) and the additional optimizations it offers +(including quantization of model weights). -- GitLab From 6399c574c12fc58054dbd5989efde2e2d665e3d6 Mon Sep 17 00:00:00 2001 From: Dan Ringwalt Date: Wed, 28 Feb 2018 07:22:02 -0800 Subject: [PATCH 148/311] Replace deprecated _control_inputs with remove/add to avoid warnings. PiperOrigin-RevId: 187321605 --- tensorflow/contrib/graph_editor/reroute.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/graph_editor/reroute.py b/tensorflow/contrib/graph_editor/reroute.py index 7ffdbb7139..95c02a64d4 100644 --- a/tensorflow/contrib/graph_editor/reroute.py +++ b/tensorflow/contrib/graph_editor/reroute.py @@ -471,9 +471,10 @@ def remove_control_inputs(op, cops): if cop not in op.control_inputs: raise ValueError("{} is not a control_input of {}".format(op.name, cop.name)) + control_inputs = [cop for cop in op.control_inputs if cop not in cops] # pylint: disable=protected-access - op._control_inputs = [cop for cop in op._control_inputs if cop not in cops] - op._recompute_node_def() + op._remove_all_control_inputs() + op._add_control_inputs(control_inputs) # pylint: enable=protected-access @@ -496,9 +497,6 @@ def add_control_inputs(op, cops): if cop in op.control_inputs: raise ValueError("{} is already a control_input of {}".format(cop.name, op.name)) - # pylint: disable=protected-access - op._control_inputs += cops - op._recompute_node_def() - # pylint: enable=protected-access + op._add_control_inputs(cops) # pylint: disable=protected-access remove_undocumented(__name__, _allowed_symbols) -- GitLab From f48d3644d433a00733cbe44be67ef4e8ab2988e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 07:40:08 -0800 Subject: [PATCH 149/311] Pass 'import_scope' when calling from_control_flow_context_def. PiperOrigin-RevId: 187323218 --- tensorflow/python/ops/control_flow_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index fb9e2188d7..215c6940df 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1790,7 +1790,7 @@ class CondContext(ControlFlowContext): ret.Enter() for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def) + from_control_flow_context_def(nested_def, import_scope=import_scope) ret.Exit() return ret -- GitLab From e5ab5347d695fe3f7f495864329c05a2ff8b512a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 09:54:19 -0800 Subject: [PATCH 150/311] Move Roadmap to a more prominent place. PiperOrigin-RevId: 187338696 --- tensorflow/docs_src/about/index.md | 1 - tensorflow/docs_src/about/leftnav_files | 1 - tensorflow/docs_src/community/index.md | 1 + tensorflow/docs_src/community/leftnav_files | 1 + tensorflow/docs_src/{about => community}/roadmap.md | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename tensorflow/docs_src/{about => community}/roadmap.md (100%) diff --git a/tensorflow/docs_src/about/index.md b/tensorflow/docs_src/about/index.md index 5326b1e110..dc1e9af876 100644 --- a/tensorflow/docs_src/about/index.md +++ b/tensorflow/docs_src/about/index.md @@ -3,7 +3,6 @@ This section provides a few documents about TensorFlow itself, including the following: - * @{$roadmap$Roadmap}, which summarizes upcoming additions to TensorFlow. * @{$uses$TensorFlow in Use}, which provides a link to our model zoo and lists some popular ways that TensorFlow is being used. * @{$bib$TensorFlow White Papers}, which provides abstracts of white papers diff --git a/tensorflow/docs_src/about/leftnav_files b/tensorflow/docs_src/about/leftnav_files index 28f039e9b5..63763b9d9c 100644 --- a/tensorflow/docs_src/about/leftnav_files +++ b/tensorflow/docs_src/about/leftnav_files @@ -1,5 +1,4 @@ index.md -roadmap.md uses.md bib.md attribution.md diff --git a/tensorflow/docs_src/community/index.md b/tensorflow/docs_src/community/index.md index 8e67022648..b706d9b204 100644 --- a/tensorflow/docs_src/community/index.md +++ b/tensorflow/docs_src/community/index.md @@ -5,6 +5,7 @@ This section contains the following documents: * @{$welcome$Welcome to the TensorFlow Community}, which explains how you can get involved, where to report issues, and where to join like-minded TensorFlow enthusiasts online. + * @{$roadmap$Roadmap}, which summarizes upcoming additions to TensorFlow. * @{$documentation$Writing TensorFlow Documentation}, which explains TensorFlow's documentation conventions. If you are modifying TensorFlow source code or documentation, please read this guide. diff --git a/tensorflow/docs_src/community/leftnav_files b/tensorflow/docs_src/community/leftnav_files index c1595d3c95..fab35024ad 100644 --- a/tensorflow/docs_src/community/leftnav_files +++ b/tensorflow/docs_src/community/leftnav_files @@ -1,5 +1,6 @@ index.md welcome.md +roadmap.md documentation.md style_guide.md benchmarks.md diff --git a/tensorflow/docs_src/about/roadmap.md b/tensorflow/docs_src/community/roadmap.md similarity index 100% rename from tensorflow/docs_src/about/roadmap.md rename to tensorflow/docs_src/community/roadmap.md -- GitLab From 120fdaa4a2869a9bde183ec42398df527bbcc6e0 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Wed, 28 Feb 2018 09:59:49 -0800 Subject: [PATCH 151/311] BUILD file visibility change. END_PUBLIC RELNOTES: n/a BEGIN_PUBLIC Automated g4 rollback of changelist 187222292 PiperOrigin-RevId: 187339609 --- tensorflow/compiler/jit/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index af259e0564..c7c9e9bd7a 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -205,6 +205,7 @@ cc_library( name = "graph_to_functiondef", srcs = ["graph_to_functiondef.cc"], hdrs = ["graph_to_functiondef.h"], + visibility = [":friends"], deps = [ "//tensorflow/core:core_cpu", "//tensorflow/core:framework", -- GitLab From 3c9cd2576cb9b88b641b5e38248ca7e49aa5c50a Mon Sep 17 00:00:00 2001 From: MandarJKulkarni <33712629+MandarJKulkarni@users.noreply.github.com> Date: Thu, 1 Mar 2018 00:05:09 +0530 Subject: [PATCH 152/311] Fix typos in profiler.h (#16938) --- tensorflow/cc/profiler/profiler.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/cc/profiler/profiler.h b/tensorflow/cc/profiler/profiler.h index 6077c45c58..64edbb5766 100644 --- a/tensorflow/cc/profiler/profiler.h +++ b/tensorflow/cc/profiler/profiler.h @@ -61,18 +61,18 @@ class Profiler { /// Adds tracing information `run_meta` to profiler. A `run_meta` is /// generated by a TensorFlow session run call. `step` is the key /// to the `run_meta`. When calling ProfileXXX methods, caller can specify - /// `step` in `options` to seletively profile the corresponding `run_meta`. + /// `step` in `options` to selectively profile the corresponding `run_meta`. /// Multiple different `run_meta` can be keyed by the same `step` in order /// to group them together. void AddStep(int64 step, const RunMetadata& run_meta); /// Profiles the model by organizing nodes in graph structure. - /// Each node is an op and the nodes are contected by the op inputs/outputs. + /// Each node is an op and the nodes are connected by the op inputs/outputs. GraphNodeProto ProfileGraph(const Options& options); /// Profiles the model by organizing nodes in name scope structure. /// Each node is an op, and nodes are organized by the ops' name - /// scope, similar to a filesystem tree. + /// scope, similar to a file system tree. /// E.g. /foo is the root of operation /foo/matmul_1 and foo/conv_2. GraphNodeProto ProfileNameScope(const Options& options); -- GitLab From 12d8142dc1bb914fa3ff0a9029e9b6b71e36b9f5 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Wed, 28 Feb 2018 10:43:36 -0800 Subject: [PATCH 153/311] [eager] Typo correction, there is no method `tf.data.Dataset.make_iterator`. PiperOrigin-RevId: 187347001 --- tensorflow/contrib/eager/python/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index d177bfeab2..36b7d6d009 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -71,7 +71,7 @@ class Iterator(object): if not context.in_eager_mode(): raise RuntimeError( "{} objects can only be used when eager execution is enabled, use " - "tf.data.Dataset.make_iterator or " + "tf.data.Dataset.make_initializable_iterator or " "tf.data.Dataset.make_one_shot_iterator for graph construction". format(type(self))) with ops.device("/device:CPU:0"): -- GitLab From 39a43c4f1d73b0210795d2003b127d3ffa284e98 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 28 Feb 2018 11:07:10 -0800 Subject: [PATCH 154/311] Introduce a ShapeUtil::ForEachIndexWithStatus, change index type to ArraySlice This is not used yet, but I need it in a later CL. I don't specifically need the argument to be an ArraySlice, but it seemed cleaner than taking a const ref to a vector. No functional change intended. PiperOrigin-RevId: 187352376 --- tensorflow/compiler/xla/literal_util.cc | 2 +- tensorflow/compiler/xla/literal_util.h | 2 +- tensorflow/compiler/xla/literal_util_test.cc | 30 +++++++-------- .../compiler/xla/service/hlo_evaluator.cc | 6 +-- tensorflow/compiler/xla/shape_util.h | 38 ++++++++++++++----- tensorflow/compiler/xla/shape_util_test.cc | 32 ++++++++++++++-- 6 files changed, 77 insertions(+), 33 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 823da43b5a..3962a9b316 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -223,7 +223,7 @@ Status Literal::CopySliceFromInternal( Literal::StrideConfig stride_config(src_literal.shape(), shape(), copy_size); - auto copy_proc = [&](const std::vector& indexes) { + auto copy_proc = [&](tensorflow::gtl::ArraySlice indexes) { // Map from multi-dimensional index, to source index. std::transform(indexes.begin(), indexes.end(), src_base.begin(), src_indexes.begin(), std::plus()); diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index d5ae3fd723..1d58f0cbc7 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -1269,7 +1269,7 @@ Status Literal::Populate(const FnType& generator) { int64 minor_dimension_size = ShapeUtil::GetDimension(this_shape, stride_config.minor_dimension); - auto init_function = [&](const std::vector& indexes) { + auto init_function = [&](tensorflow::gtl::ArraySlice indexes) { const int64 index = IndexUtil::MultidimensionalIndexToLinearIndex(shape(), indexes); std::copy(indexes.begin(), indexes.end(), minor_scan_indexes.begin()); diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index ee2f4fe874..9ff0771110 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -30,6 +30,7 @@ limitations under the License. namespace xla { namespace { +using tensorflow::gtl::ArraySlice; using ::testing::ElementsAre; using ::testing::HasSubstr; @@ -214,11 +215,11 @@ TEST_F(LiteralUtilTest, CreateSparse) { std::vector expected_values = {8, 9, 7, 10}; EXPECT_EQ(literal->sparse_indices()->data(), - tensorflow::gtl::ArraySlice( - expected_indices.data(), expected_indices.num_elements())); - EXPECT_EQ(tensorflow::gtl::ArraySlice(literal->data().data(), - expected_values.size()), - tensorflow::gtl::ArraySlice(expected_values)); + ArraySlice(expected_indices.data(), + expected_indices.num_elements())); + EXPECT_EQ( + ArraySlice(literal->data().data(), expected_values.size()), + ArraySlice(expected_values)); } TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) { @@ -290,7 +291,7 @@ TEST_F(LiteralUtilTest, EachCellR2F32) { // clang-format on std::vector> seen; literal->EachCellAsString( - [&seen](tensorflow::gtl::ArraySlice indices, const string& value) { + [&seen](ArraySlice indices, const string& value) { seen.emplace_back(indices[0], indices[1], value); }); @@ -622,11 +623,10 @@ TEST_F(LiteralUtilTest, TransposeR4) { // clang-format on auto reshape = original->Transpose(/*permutation=*/{2, 3, 0, 1}); - reshape->EachCell( - [&](tensorflow::gtl::ArraySlice indices, float value) { - EXPECT_EQ(value, original->Get( - {indices[2], indices[3], indices[0], indices[1]})); - }); + reshape->EachCell([&](ArraySlice indices, float value) { + EXPECT_EQ(value, original->Get( + {indices[2], indices[3], indices[0], indices[1]})); + }); } TEST_F(LiteralUtilTest, TestR4RelayoutEquivalence) { @@ -863,7 +863,7 @@ TEST_F(LiteralUtilTest, CopySliceFrom) { const int64 zero_base[] = {0, 0, 0, 0}; const int64 step[] = {1, 1, 1, 1}; uint32 seqnr = 0; - auto init_proc = [&](const std::vector& indexes) { + auto init_proc = [&](ArraySlice indexes) { source->Set(indexes, ++seqnr); return true; }; @@ -879,7 +879,7 @@ TEST_F(LiteralUtilTest, CopySliceFrom) { std::vector source_indexes(TF_ARRAYSIZE(dimensions), 0); std::vector blank_indexes(TF_ARRAYSIZE(dimensions), 0); bool matched = true; - auto check_proc = [&](const std::vector& indexes) { + auto check_proc = [&](ArraySlice indexes) { std::copy(indexes.begin(), indexes.end(), source_indexes.begin()); std::transform(source_indexes.begin(), source_indexes.end(), src_base, source_indexes.begin(), std::plus()); @@ -1067,7 +1067,7 @@ TEST_F(LiteralUtilTest, Populate) { primitive_util::NativeToPrimitiveType(), data.dimensions, data.layout); auto literal = Literal::CreateFromShape(shape); - auto generator = [&](tensorflow::gtl::ArraySlice indexes) -> uint32 { + auto generator = [&](ArraySlice indexes) -> uint32 { // Offsets from linear index just to avoid R0 literals to be initialized // with zero. return IndexUtil::MultidimensionalIndexToLinearIndex(literal->shape(), @@ -1079,7 +1079,7 @@ TEST_F(LiteralUtilTest, Populate) { std::vector zero_base(data.dimensions.size(), 0); std::vector step(data.dimensions.size(), 1); bool matched = true; - auto check_function = [&](const std::vector& indexes) { + auto check_function = [&](ArraySlice indexes) { auto value = literal->Get(indexes); matched = matched && (value == generator(indexes)); return matched; diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index c3a3251b7d..edb1ad2360 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1222,7 +1222,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // corresponding index of the resulting padded literal. const PaddingConfig& pad_config = pad->padding_config(); - auto func = [&](const std::vector& input_index) { + auto func = [&](ArraySlice input_index) { for (auto i = 0; i < input_index.size(); ++i) { // Interior padding occurs logically before edge padding, so in the case // of negative edge padding elements are removed from the @@ -1518,7 +1518,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { base[result_to_arg_index[i]] = multi_index[i]; } - auto func = [&](const std::vector& input_index) { + auto func = [&](ArraySlice input_index) { auto curr_val = arg_literal.Get(input_index); // Evaluate computation with specified literal operands. @@ -1954,7 +1954,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = operand_literal.CloneToUnique(); std::vector result_index(ShapeUtil::Rank(result->shape()), 0); - auto func = [&](const std::vector& update_index) { + auto func = [&](ArraySlice update_index) { std::transform(update_index.begin(), update_index.end(), start.begin(), result_index.begin(), std::plus()); diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 8ee263fe5e..923315e001 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -564,16 +565,16 @@ class ShapeUtil { // The visitor_function visitor function should return true if it wants to // continue, or false otherwise. // - // visitor_function must be a callable of type bool(const std::vector&) - // or compatible. + // visitor_function must be a callable of type + // StatusOr(ArraySlice) or compatible. template - static void ForEachIndex(const Shape& shape, - tensorflow::gtl::ArraySlice base, - tensorflow::gtl::ArraySlice count, - tensorflow::gtl::ArraySlice incr, - const FnType& visitor_function) { + static Status ForEachIndexWithStatus(const Shape& shape, + tensorflow::gtl::ArraySlice base, + tensorflow::gtl::ArraySlice count, + tensorflow::gtl::ArraySlice incr, + const FnType& visitor_function) { if (ShapeUtil::HasZeroElements(shape)) { - return; + return Status::OK(); } CHECK_EQ(Rank(shape), base.size()); CHECK_EQ(incr.size(), base.size()); @@ -583,7 +584,11 @@ class ShapeUtil { // once with the proper empty indexes. int64 n = -1; std::vector indexes(base.begin(), base.end()); - while (n < rank && visitor_function(indexes)) { + while (n < rank) { + TF_ASSIGN_OR_RETURN(bool should_continue, visitor_function(indexes)); + if (!should_continue) { + break; + } // Increments dimensions in minor to major order. for (n = 0; n < rank; ++n) { int64 dim = LayoutUtil::Minor(shape.layout(), n); @@ -594,6 +599,21 @@ class ShapeUtil { indexes[dim] = base[dim]; } } + + return Status::OK(); + } + + template + static void ForEachIndex(const Shape& shape, + tensorflow::gtl::ArraySlice base, + tensorflow::gtl::ArraySlice count, + tensorflow::gtl::ArraySlice incr, + const FnType& visitor_function) { + ForEachIndexWithStatus(shape, base, count, incr, + [&](tensorflow::gtl::ArraySlice indices) { + return StatusOr(visitor_function(indices)); + }) + .IgnoreError(); } private: diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index 4db97d45b2..a357415698 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -573,10 +573,11 @@ TEST(ShapeUtilTest, ForEachIndex) { Shape shape = ShapeUtil::MakeShape(F32, data.dimensions); // Increments at every invocation. int invocations = 0; - auto increment_func = [&invocations](const std::vector& indexes) { - invocations++; - return true; - }; + auto increment_func = + [&invocations](tensorflow::gtl::ArraySlice indexes) { + invocations++; + return true; + }; std::vector zero_base(data.dimensions.size(), 0); std::vector step(data.dimensions.size(), 1); @@ -588,6 +589,29 @@ TEST(ShapeUtilTest, ForEachIndex) { } } +TEST(ShapeUtilTest, ForEachIndexWithStatus) { + Shape shape = ShapeUtil::MakeShape(F32, {10, 10}); + // Increments at every invocation. + int invocations = 0; + auto increment_func = + [&invocations]( + tensorflow::gtl::ArraySlice indexes) -> StatusOr { + if (++invocations == 5) { + return Unimplemented("Cannot increment beyond 5."); + } + return true; + }; + + Status error_status = ShapeUtil::ForEachIndexWithStatus( + shape, /*base=*/{0, 0}, /*count=*/{10, 10}, /*incr=*/{0, 1}, + increment_func); + + EXPECT_FALSE(error_status.ok()); + EXPECT_THAT(error_status.error_message(), + ::testing::HasSubstr("Cannot increment beyond 5.")); + EXPECT_EQ(invocations, 5); +} + TEST(ShapeUtilTest, DimensionsUnmodifiedByReshape_1x1x1x1_to_1x1x1) { // All output dimensions should be unmodified. One of the input dimensions is // modified because the input rank is larger by one. -- GitLab From 09d9715460bf4d0d0d2229816fe45eb81676a9ca Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 28 Feb 2018 11:50:17 -0800 Subject: [PATCH 155/311] Disable GRPC io utils test. PiperOrigin-RevId: 187360410 --- tensorflow/core/debug/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index 40cb8353cd..f6fe9edb02 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -298,6 +298,9 @@ tf_cc_test( size = "small", srcs = ["debug_grpc_io_utils_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), + tags = [ + "no_oss", # b/73962011 + ], deps = [ ":debug_graph_utils", ":debug_grpc_testlib", -- GitLab From 31421c3fa3a0585c01198458fa123c3493c21b62 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 28 Feb 2018 12:13:22 -0800 Subject: [PATCH 156/311] [XLA] Fix BF16 normalization to avoid the pass adding new unsupported mixed precision. Resolve unsupported input/output first, then resolve unsupported mixed precision. PiperOrigin-RevId: 187363969 --- .../xla/service/bfloat16_normalization.cc | 115 ++++++++++-------- .../service/bfloat16_normalization_test.cc | 33 ++++- 2 files changed, 95 insertions(+), 53 deletions(-) diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc index b032c040e8..6176f5d209 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc @@ -221,41 +221,37 @@ Status BFloat16NormalizationVisitor::HandleCrossReplicaSum( } Status BFloat16NormalizationVisitor::HandleInstruction(HloInstruction* hlo) { - std::vector bf16_operands; - std::vector f32_operands; - bool has_f32 = false; - bool has_bf16 = false; + int f32_count = 0; + int bf16_count = 1; for (int64 i = 0; i < hlo->operand_count(); ++i) { if (hlo->operand(i)->shape().element_type() == F32) { - f32_operands.push_back(i); - has_f32 = true; + f32_count += 1; } else if (hlo->operand(i)->shape().element_type() == BF16) { - bf16_operands.push_back(i); - has_bf16 = true; + bf16_count += 1; } } if (hlo->shape().element_type() == F32) { - has_f32 = true; + f32_count += 1; } else if (hlo->shape().element_type() == BF16) { - has_bf16 = true; + bf16_count += 1; } std::vector bf16_called_comps; for (auto* comp : hlo->called_computations()) { bool comp_has_bf16 = false; if (comp->root_instruction()->shape().element_type() == F32) { - has_f32 = true; + f32_count += 1; } else if (comp->root_instruction()->shape().element_type() == BF16) { - has_bf16 = true; + bf16_count += 1; comp_has_bf16 = true; } for (auto* param : comp->parameter_instructions()) { if (param->shape().element_type() == F32) { - has_f32 = true; + f32_count += 1; } else if (param->shape().element_type() == BF16) { - has_bf16 = true; + bf16_count += 1; comp_has_bf16 = true; } } @@ -264,54 +260,69 @@ Status BFloat16NormalizationVisitor::HandleInstruction(HloInstruction* hlo) { } } - if (!bfloat16_support_->SupportsMixedPrecisions(*hlo) && has_bf16 && - has_f32) { - // Resolve unsupported mixed precision. - // - // See if we can change everything to BF16. - if (hlo->called_computations().empty() && - hlo->shape().element_type() == BF16) { - bool can_use_bf16 = true; - for (int i : f32_operands) { - if (bfloat16_support_->EffectiveOperandPrecisionIsOutputPrecision(*hlo, - i) && - bfloat16_support_->SupportsBF16Operand(*hlo, i)) { - continue; - } - can_use_bf16 = false; - break; - } - if (can_use_bf16) { - for (int i : f32_operands) { - TF_RETURN_IF_ERROR( - InsertConvertBeforeOperand(hlo, i, BF16, computation_)); - } - return Status::OK(); - } - } - if (hlo->shape().element_type() == BF16) { - TF_RETURN_IF_ERROR( - ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); - } - for (int i : bf16_operands) { - TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); - } - return ConvertCalledComputations(hlo, bf16_called_comps); - } - - for (int i : bf16_operands) { - if (!bfloat16_support_->SupportsBF16Operand(*hlo, i)) { + // Resolve unsupported BF16 operands. + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == BF16 && + !bfloat16_support_->SupportsBF16Operand(*hlo, i)) { TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); + bf16_count -= 1; + f32_count += 1; } } + // Resolve unsupported BF16 output. if (hlo->shape().element_type() == BF16 && !bfloat16_support_->SupportsBF16Output(*hlo)) { TF_RETURN_IF_ERROR( ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); + bf16_count -= 1; + f32_count += 1; } - return Status::OK(); + // Resolve unsupported mixed precision after resolving unsupported BF16 + // operands and output, because the numbers of BF16 operands/output and F32 + // operands/output may have changed. + if (bfloat16_support_->SupportsMixedPrecisions(*hlo) || bf16_count == 0 || + f32_count == 0) { + return Status::OK(); + } + // See if we can change everything to BF16. + if (hlo->called_computations().empty() && + hlo->shape().element_type() == BF16) { + bool can_use_bf16 = true; + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == BF16) { + continue; + } + if ((bfloat16_support_->EffectiveOperandPrecisionIsBF16(*hlo, i) || + bfloat16_support_->EffectiveOperandPrecisionIsOutputPrecision(*hlo, + i)) && + bfloat16_support_->SupportsBF16Operand(*hlo, i)) { + continue; + } + can_use_bf16 = false; + break; + } + if (can_use_bf16) { + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == F32) { + TF_RETURN_IF_ERROR( + InsertConvertBeforeOperand(hlo, i, BF16, computation_)); + } + } + return Status::OK(); + } + } + if (hlo->shape().element_type() == BF16) { + TF_RETURN_IF_ERROR( + ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); + } + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == BF16) { + TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); + } + } + return ConvertCalledComputations(hlo, bf16_called_comps); } Status BFloat16NormalizationVisitor::DefaultAction(HloInstruction* hlo) { diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc index 66c3085842..fc0f6f1948 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -41,13 +41,17 @@ class TestBFloat16Support : public BFloat16Support { hlo.opcode() == HloOpcode::kGetTupleElement) { return true; } + if (hlo.opcode() == HloOpcode::kDot) { + // Test that only the first operand of kDot supports BF16. + return operand_index == 0; + } return false; } bool SupportsBF16Output(const HloInstruction& hlo) const override { if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kReduce || hlo.opcode() == HloOpcode::kSubtract || - hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kDot || hlo.opcode() == HloOpcode::kTuple || hlo.opcode() == HloOpcode::kGetTupleElement) { return true; } @@ -245,4 +249,31 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleCrossReplicaSum) { EXPECT_EQ(ShapeUtil::GetSubshape(crs->shape(), {1}).element_type(), F32); } +// Tests that the normalization should not cause unsupported mixed precision due +// to resolving unsupported BF16 operand. +TEST_F(BFloat16NormalizationTest, DoNotAddUnsupportedMixedPrecision) { + auto builder = HloComputation::Builder(TestName()); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {4, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, bf16_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "b")); + + HloInstruction* dot = builder.AddInstruction( + HloInstruction::CreateBinary(bf16_shape, HloOpcode::kDot, a, b)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module.get())); + + EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kConvert); + EXPECT_EQ(dot->shape().element_type(), F32); + EXPECT_EQ(dot->operand(0)->shape().element_type(), F32); + EXPECT_EQ(dot->operand(0)->opcode(), HloOpcode::kConvert); + EXPECT_EQ(dot->operand(1)->shape().element_type(), F32); + EXPECT_EQ(dot->operand(1)->opcode(), HloOpcode::kConvert); +} + } // namespace xla -- GitLab From 3dbbf740441cdd41b2dc998e09980d72d2e9d440 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Wed, 28 Feb 2018 12:14:03 -0800 Subject: [PATCH 157/311] In Grappler item builder, support inferring fetch nodes from siganture defs. PiperOrigin-RevId: 187364078 --- .../core/grappler/grappler_item_builder.cc | 76 ++++++++++++++++--- .../grappler/grappler_item_builder_test.cc | 53 +++++++++++++ 2 files changed, 117 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 606807b9e9..33ad426bbf 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -168,12 +168,6 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( // Fill in feed nodes from config, if any provided. for (const auto& feed_node : cfg.feed_nodes) { const string feed_name = NodeName(feed_node); - if (feed_name.empty()) { - LOG(ERROR) << "Invalid feed node name " << feed_node - << ", skipping this input."; - return nullptr; - } - VLOG(1) << "Will use feed node " << feed_name; new_item->feed.emplace_back(feed_name, Tensor()); } @@ -182,17 +176,75 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( const CollectionDef& nodes = meta_graph.collection_def().at("train_op"); if (nodes.has_node_list()) { for (const auto& node : nodes.node_list().value()) { - const string name = NodeName(node); - if (name.empty()) { - LOG(ERROR) << "Invalid fetch node name " << node - << ", skipping this input"; + new_item->fetch.push_back(NodeName(node)); + } + } + } + + // Detect feed and fetch nodes from signature defs. + for (const auto& name_and_signature : meta_graph.signature_def()) { + for (const auto& name_and_input : name_and_signature.second.inputs()) { + const TensorInfo& input = name_and_input.second; + if (input.has_coo_sparse()) { + // Define the shapes following the comment of CooSparse. + PartialTensorShape partial_shape_1d({-1}); + PartialTensorShape partial_shape_2d({-1, -1}); + TensorShape shape_1d; + TensorShape shape_2d; + if (!partial_shape_1d.AsTensorShape(&shape_1d) || + !partial_shape_2d.AsTensorShape(&shape_2d)) { + LOG(ERROR) << "Internal error when constructing tensor shapes."; return nullptr; } - VLOG(1) << "Will use fetch node " << name; - new_item->fetch.push_back(name); + + new_item->feed.emplace_back( + NodeName(input.coo_sparse().values_tensor_name()), + Tensor(input.dtype(), shape_1d)); + new_item->feed.emplace_back( + NodeName(input.coo_sparse().indices_tensor_name()), + Tensor(DT_INT64, shape_2d)); + new_item->feed.emplace_back( + NodeName(input.coo_sparse().dense_shape_tensor_name()), + Tensor(DT_INT64, shape_1d)); + } else { + new_item->feed.emplace_back( + NodeName(input.name()), + Tensor(input.dtype(), input.tensor_shape())); } } + for (const auto& name_and_output : name_and_signature.second.outputs()) { + const TensorInfo& output = name_and_output.second; + if (output.has_coo_sparse()) { + new_item->fetch.push_back( + NodeName(output.coo_sparse().values_tensor_name())); + new_item->fetch.push_back( + NodeName(output.coo_sparse().indices_tensor_name())); + new_item->fetch.push_back( + NodeName(output.coo_sparse().dense_shape_tensor_name())); + } else { + new_item->fetch.push_back(NodeName(output.name())); + } + } + } + + for (const auto& feed : new_item->feed) { + if (feed.first.empty()) { + LOG(ERROR) << "Invalid feed node name skipping this input"; + return nullptr; + } else { + VLOG(1) << "Will use feed node " << feed.first; + } + } + + for (const auto& fetch : new_item->fetch) { + if (fetch.empty()) { + LOG(ERROR) << "Invalid fetch node name skipping this input"; + return nullptr; + } else { + VLOG(1) << "Will use fetch node " << fetch; + } } + if (new_item->fetch.empty()) { LOG(ERROR) << "Failed to detect the fetch node(s), skipping this input"; return nullptr; diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc index ef95992af7..78cbff6c90 100644 --- a/tensorflow/core/grappler/grappler_item_builder_test.cc +++ b/tensorflow/core/grappler/grappler_item_builder_test.cc @@ -280,6 +280,59 @@ TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) { ASSERT_TRUE(item != nullptr); } +TEST_F(GrapplerItemBuilderTest, FromGraphWithSignatureDef) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto x = ops::Const(s.WithOpName("x"), 0); + auto y = ops::Const(s.WithOpName("y"), 1); + auto z = ops::Add(s.WithOpName("z"), x, y); + + MetaGraphDef meta_graph; + TF_CHECK_OK(s.ToGraphDef(meta_graph.mutable_graph_def())); + + TensorInfo input, output; + input.set_name("x"); + input.set_dtype(DT_FLOAT); + output.set_name("z"); + SignatureDef serving_signature; + (*serving_signature.mutable_inputs())["input"] = input; + (*serving_signature.mutable_outputs())["output"] = output; + (*meta_graph.mutable_signature_def())["serving"] = serving_signature; + + std::unique_ptr item = + GrapplerItemFromMetaGraphDef("0", meta_graph, ItemConfig()); + ASSERT_TRUE(item != nullptr); + + EXPECT_EQ(item->feed[0].first, "x"); + EXPECT_EQ(item->fetch[0], "z"); +} + +TEST_F(GrapplerItemBuilderTest, FromGraphWithIncompleteSignatureDef) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto x = ops::Const(s.WithOpName("x"), 0); + auto y = ops::Const(s.WithOpName("y"), 1); + + MetaGraphDef meta_graph; + TF_CHECK_OK(s.ToGraphDef(meta_graph.mutable_graph_def())); + + CollectionDef train_op; + train_op.mutable_node_list()->add_value("y"); + (*meta_graph.mutable_collection_def())["train_op"] = train_op; + + TensorInfo input, output; + input.set_name("x"); + input.set_dtype(DT_FLOAT); + // Its coo_sparse proto is incomplete. + output.mutable_coo_sparse()->set_values_tensor_name("z"); + SignatureDef serving_signature; + (*serving_signature.mutable_inputs())["input"] = input; + (*serving_signature.mutable_outputs())["output"] = output; + (*meta_graph.mutable_signature_def())["serving"] = serving_signature; + + std::unique_ptr item = + GrapplerItemFromMetaGraphDef("0", meta_graph, ItemConfig()); + ASSERT_TRUE(item == nullptr); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From b07680459a88224fce83daa7b3b70bcc62b9c896 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Thu, 1 Mar 2018 04:38:30 +0800 Subject: [PATCH 158/311] [Windows] Copy NominalCPUFrequency from Abseil (#16905) * [Windows] Copy NominalCPUFrequency from Abseil * Add #include --- tensorflow/core/platform/windows/port.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc index 582b232054..f3b27ea394 100644 --- a/tensorflow/core/platform/windows/port.cc +++ b/tensorflow/core/platform/windows/port.cc @@ -25,6 +25,7 @@ limitations under the License. #endif #include +#include #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/demangle.h" @@ -149,11 +150,16 @@ bool Snappy_Uncompress(const char* input, size_t length, char* output) { string Demangle(const char* mangled) { return mangled; } double NominalCPUFrequency() { -#ifdef TENSORFLOW_USE_ABSL - return absl::base_internal::NominalCPUFrequency(); -#else + DWORD data; + DWORD data_size = sizeof(data); + #pragma comment(lib, "shlwapi.lib") // For SHGetValue(). + if (SUCCEEDED( + SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", nullptr, &data, &data_size))) { + return data * 1e6; // Value is MHz. + } return 1.0; -#endif } int64 AvailableRam() { -- GitLab From 8a31fec675f3f1ade28a9a8f38cc8f72d9573256 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Wed, 28 Feb 2018 12:55:34 -0800 Subject: [PATCH 159/311] [XLA] FP16 Dot support for the CPU and GPU backends. Extend the stream interface ThenBlasGemmWithAlgorithm to support F16 matrix multiplication with computation type FP32. Extend the stream executor interface DoBlasGemmWithAlgorithm to support F16 GEMM with computation type FP32. Extend the CPU IR emitter to handle F16 Dot instruction, and add F16 matrix multiplication implementation to the CPU runtime. Extend the GPU backend to handle FP16 GEMM Thunk. Replicate the existing matrix multiplication test cases in matrix_ops_simple_test and dot_operation_test for FP16. RELNOTES: PiperOrigin-RevId: 187369731 --- tensorflow/compiler/xla/array.h | 45 +- tensorflow/compiler/xla/array2d.h | 17 +- tensorflow/compiler/xla/array3d.h | 9 +- tensorflow/compiler/xla/array4d.h | 9 +- tensorflow/compiler/xla/reference_util.cc | 56 +- tensorflow/compiler/xla/reference_util.h | 16 +- tensorflow/compiler/xla/service/cpu/BUILD | 1 - .../compiler/xla/service/cpu/cpu_runtime.cc | 4 + .../compiler/xla/service/cpu/cpu_runtime.h | 2 + .../xla/service/cpu/dot_op_emitter.cc | 9 +- .../compiler/xla/service/cpu/ir_emitter.cc | 2 +- .../xla/service/cpu/runtime_matmul.cc | 39 +- .../compiler/xla/service/cpu/runtime_matmul.h | 6 + .../xla/service/cpu/runtime_matvec.cc | 110 --- .../compiler/xla/service/cpu/runtime_matvec.h | 94 ++- .../cpu/runtime_single_threaded_matmul.cc | 36 +- .../cpu/runtime_single_threaded_matmul.h | 6 + .../xla/service/cpu/simple_orc_jit.cc | 2 + .../compiler/xla/service/gpu/gemm_thunk.cc | 25 +- .../xla/service/gpu/ir_emission_utils.cc | 6 +- tensorflow/compiler/xla/shape_util.h | 9 + tensorflow/compiler/xla/tests/BUILD | 1 + .../compiler/xla/tests/convolution_test.cc | 52 +- .../compiler/xla/tests/dot_operation_test.cc | 673 +++++++++--------- .../xla/tests/matrix_ops_simple_test.cc | 375 +++++----- tensorflow/stream_executor/blas.cc | 6 + tensorflow/stream_executor/blas.h | 2 + tensorflow/stream_executor/cuda/cuda_blas.cc | 47 +- 28 files changed, 868 insertions(+), 791 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/cpu/runtime_matvec.cc diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 46ee4e64c9..24b58bec11 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -121,10 +121,31 @@ class Array { CHECK(idx == num_elements()); } - // Creates a 2D array of Eigen::half from the given nested initializer list of - // float values. + // Creates a 1D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && + std::is_same::value>::type> + Array(std::initializer_list values) + : Array(ToInt64Vector({values.size()})) { + int64 idx = 0; + for (const auto& it1 : values) { + values_[idx] = static_cast(it1); + ++idx; + } + CHECK(idx == num_elements()); + } + + // Creates a 2D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. + template ::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array(std::initializer_list> values) : Array(ToInt64Vector({values.size(), values.begin()->size()})) { @@ -155,10 +176,13 @@ class Array { CHECK(idx == num_elements()); } - // Creates a 3D array of Eigen::half from the given nested initializer list of - // float values. + // Creates a 3D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array(std::initializer_list>> values) @@ -196,10 +220,13 @@ class Array { CHECK(idx == num_elements()); } - // Creates a 4D array of Eigen::half from the given nested initializer list of - // float values. + // Creates a 4D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array(std::initializer_list< std::initializer_list>>> diff --git a/tensorflow/compiler/xla/array2d.h b/tensorflow/compiler/xla/array2d.h index d30e78ecde..a17e81f448 100644 --- a/tensorflow/compiler/xla/array2d.h +++ b/tensorflow/compiler/xla/array2d.h @@ -53,10 +53,13 @@ class Array2D : public Array { Array2D(std::initializer_list> values) : Array(values) {} - // Creates an array of Eigen::half from the given nested initializer list of - // float values. + // Creates an array of a floating-point type (half, bfloat16, float, + // or double) from the given nested initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array2D(std::initializer_list> values) : Array(values) {} @@ -100,14 +103,16 @@ std::unique_ptr> MakeLinspaceArray2D(double from, double to, int64 n1, int64 n2) { auto array = MakeUnique>(n1, n2); int64 count = n1 * n2; - NativeT step = (count > 1) ? (to - from) / (count - 1) : 0.0f; + NativeT step = + static_cast((count > 1) ? (to - from) / (count - 1) : 0); auto set = [&array, n1, n2](int64 index, NativeT value) { (*array)(index / n2, index % n2) = value; }; for (int64 i = 0; i < count - 1; ++i) { - set(i, static_cast(from + i * step)); + set(i, (static_cast(from) + + static_cast(i) * static_cast(step))); } - set(count - 1, to); + set(count - 1, static_cast(to)); return array; } } // namespace xla diff --git a/tensorflow/compiler/xla/array3d.h b/tensorflow/compiler/xla/array3d.h index e5eb235d45..0e9a0722ae 100644 --- a/tensorflow/compiler/xla/array3d.h +++ b/tensorflow/compiler/xla/array3d.h @@ -57,10 +57,13 @@ class Array3D : public Array { values) : Array(values) {} - // Creates an array of Eigen::half from the given nested initializer list of - // float values. + // Creates an array of a floating-point type (half, bfloat16, float, + // or double) from the given nested initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array3D( std::initializer_list>> diff --git a/tensorflow/compiler/xla/array4d.h b/tensorflow/compiler/xla/array4d.h index cff70e54ba..a75fffc605 100644 --- a/tensorflow/compiler/xla/array4d.h +++ b/tensorflow/compiler/xla/array4d.h @@ -82,10 +82,13 @@ class Array4D : public Array { values) : Array(values) {} - // Creates an array of Eigen::half from the given nested initializer list of - // float values. + // Creates an array of a floating-point type (half, bfloat16, float, + // or double) from the given nested initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array4D(std::initializer_list>>> diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index a9acdae380..8711b8aa2e 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -30,29 +30,23 @@ limitations under the License. namespace xla { -/* static */ std::unique_ptr> ReferenceUtil::TransposeArray2D( - const Array2D& operand) { - auto result = MakeUnique>(operand.width(), operand.height()); - for (int64 w = 0; w < operand.width(); ++w) { - for (int64 h = 0; h < operand.height(); ++h) { - (*result)(w, h) = operand(h, w); - } - } - - return result; -} - -/* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( - const Array2D& lhs, const Array2D& rhs) { +namespace { + +template +std::unique_ptr> MatmulArray2DImpl( + const Array2D& lhs, const Array2D& rhs, + const std::function& impl_fn) { CHECK_EQ(lhs.width(), rhs.height()); int m = lhs.height(); int n = rhs.width(); int k = lhs.width(); - auto result = MakeUnique>(m, n); + auto result = MakeUnique>(m, n); // Because Eigen is a header-oriented library, make sure that the Eigen code // is the same as the code used by the CPU backend (otherwise the linker will // randomly pick *some* definition). - __xla_cpu_runtime_EigenSingleThreadedMatMulF32( + impl_fn( /*run_options_ptr=*/nullptr, result->data(), rhs.data(), lhs.data(), n, m, k, /*transpose_lhs=*/0, @@ -60,22 +54,24 @@ namespace xla { return result; } +} // namespace + +/* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( + const Array2D& lhs, const Array2D& rhs) { + return MatmulArray2DImpl( + lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF16); +} + +/* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( + const Array2D& lhs, const Array2D& rhs) { + return MatmulArray2DImpl( + lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF32); +} + /* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( const Array2D& lhs, const Array2D& rhs) { - CHECK_EQ(lhs.width(), rhs.height()); - int m = lhs.height(); - int n = rhs.width(); - int k = lhs.width(); - auto result = MakeUnique>(m, n); - // Because Eigen is a header-oriented library, make sure that the Eigen code - // is the same as the code used by the CPU backend (otherwise the linker will - // randomly pick *some* definition). - __xla_cpu_runtime_EigenSingleThreadedMatMulF64( - /*run_options_ptr=*/nullptr, result->data(), rhs.data(), lhs.data(), n, m, - k, - /*transpose_lhs=*/0, - /*transpose_rhs=*/0); - return result; + return MatmulArray2DImpl( + lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF64); } /* static */ std::unique_ptr> ReferenceUtil::Array2DF32ToF64( diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 3ec96f2f38..57b0218882 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -39,10 +39,22 @@ namespace xla { class ReferenceUtil { public: // Returns the result of a transpose operation on the input matrix. - static std::unique_ptr> TransposeArray2D( - const Array2D& operand); + template + static std::unique_ptr> TransposeArray2D( + const Array2D& operand) { + auto result = MakeUnique>(operand.width(), operand.height()); + for (int64 w = 0; w < operand.width(); ++w) { + for (int64 h = 0; h < operand.height(); ++h) { + (*result)(w, h) = operand(h, w); + } + } + + return result; + } // Returns the result of a matrix multiply `lhs x rhs`. + static std::unique_ptr> MatmulArray2D( + const Array2D& lhs, const Array2D& rhs); static std::unique_ptr> MatmulArray2D( const Array2D& lhs, const Array2D& rhs); static std::unique_ptr> MatmulArray2D( diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 32be0b0c96..4170e31527 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -514,7 +514,6 @@ cc_library( cc_library( name = "runtime_matvec", - srcs = ["runtime_matvec.cc"], hdrs = ["runtime_matvec.h"], copts = runtime_copts(), deps = [ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 40ace96327..9a3bd68c80 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -31,6 +31,8 @@ XfeedManager* GetXfeedManager() { return manager; } +extern const char* const kEigenMatMulF16SymbolName = + "__xla_cpu_runtime_EigenMatMulF16"; extern const char* const kEigenMatMulF32SymbolName = "__xla_cpu_runtime_EigenMatMulF32"; extern const char* const kEigenMatMulF64SymbolName = @@ -40,6 +42,8 @@ extern const char* const kEigenConvF16SymbolName = extern const char* const kEigenConvF32SymbolName = "__xla_cpu_runtime_EigenConvF32"; extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft"; +extern const char* const kEigenSingleThreadedMatMulF16SymbolName = + "__xla_cpu_runtime_EigenSingleThreadedMatMulF16"; extern const char* const kEigenSingleThreadedMatMulF32SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF32"; extern const char* const kEigenSingleThreadedMatMulF64SymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index 2141dfe1ce..e61d6ea28b 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -41,11 +41,13 @@ namespace runtime { // the actual symbol. // 2. When using ahead-of-time compilation, the linker can resolve the name // because it is a symbol in the cpu_runtime library. +extern const char* const kEigenMatMulF16SymbolName; extern const char* const kEigenMatMulF32SymbolName; extern const char* const kEigenMatMulF64SymbolName; extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; +extern const char* const kEigenSingleThreadedMatMulF16SymbolName; extern const char* const kEigenSingleThreadedMatMulF32SymbolName; extern const char* const kEigenSingleThreadedMatMulF64SymbolName; extern const char* const kEigenSingleThreadedConvF16SymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index cfe7c9c3af..6f06256e08 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -919,6 +919,12 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { llvm::Type* float_type; const char* fn_name; switch (type) { + case F16: + fn_name = multi_threaded_eigen + ? runtime::kEigenMatMulF16SymbolName + : runtime::kEigenSingleThreadedMatMulF16SymbolName; + float_type = ir_builder_->getHalfTy(); + break; case F32: fn_name = multi_threaded_eigen ? runtime::kEigenMatMulF32SymbolName @@ -1051,7 +1057,8 @@ static bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, // The inputs and the output must // 1) be matrices with no padding, and // 2) have an allowed element type. - return output_shape.element_type() == F32 && + PrimitiveType output_primitive_type = output_shape.element_type(); + return (output_primitive_type == F32 || output_primitive_type == F16) && IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && IsRank2WithNoPadding(output_shape); } diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 4dffaee87f..3b8056d505 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2074,7 +2074,7 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( /*instruction=*/*root, /*operands=*/{lhs, rhs}, - /*supported_types=*/{F32})); + /*supported_types=*/{F16, F32})); llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc index bff57d33ae..39b13183ff 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc @@ -63,30 +63,41 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, C.device(*run_options->intra_op_thread_pool()) = A.contract(B, dims); } +template +void MatMulImpl(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, + int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { + if (m == 1 || n == 1) { + // Despite being single threaded, this version of matrix * vector is faster. + xla::EigenMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + } else { + MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); + } +} + } // namespace +void __xla_cpu_runtime_EigenMatMulF16(const void* run_options_ptr, + Eigen::half* out, Eigen::half* lhs, + Eigen::half* rhs, int64 m, int64 n, + int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + MatMulImpl(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); +} + void __xla_cpu_runtime_EigenMatMulF32(const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - // Despite being single threaded, this version of matrix * vector is faster. - xla::EigenMatVecF32(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + MatMulImpl(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); } void __xla_cpu_runtime_EigenMatMulF64(const void* run_options_ptr, double* out, double* lhs, double* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - // Despite being single threaded, this version of matrix * vector is faster. - xla::EigenMatVecF64(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + MatMulImpl(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul.h b/tensorflow/compiler/xla/service/cpu/runtime_matmul.h index fdb644651d..b5156434f6 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matmul.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul.h @@ -25,6 +25,12 @@ extern "C" { // order. 'out' is a pointer to a buffer sufficiently large to hold the result // of the operation. Following standard nomenclature: lhs is m x k, // rhs is k x n, and out is m x n. +extern void __xla_cpu_runtime_EigenMatMulF16( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, + Eigen::half* out, Eigen::half* lhs, Eigen::half* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); + extern void __xla_cpu_runtime_EigenMatMulF32( const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matvec.cc b/tensorflow/compiler/xla/service/cpu/runtime_matvec.cc deleted file mode 100644 index 435820cdd3..0000000000 --- a/tensorflow/compiler/xla/service/cpu/runtime_matvec.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "third_party/eigen3/Eigen/Core" -#include "tensorflow/compiler/xla/service/cpu/runtime_matvec.h" - -using tensorflow::int32; -using tensorflow::int64; - -namespace { - -// Does mat * x or mat^T * x. -template -void MatVec(T* out_buf, T* mat_buf, T* x_buf, int64 rows, int64 cols, - int32 transpose) { - // Use an Eigen Matrix instead of a Tensor, as the GEMV from Matrix seems to - // be faster (b/30223679). See also: the matmul op kernel in TensorFlow, - // which implements the same optimization. - using Matrix = Eigen::Matrix; - using MatrixMap = Eigen::Map; - - using Vector = Eigen::Matrix; - using VectorMap = Eigen::Map; - - auto x = VectorMap(x_buf, cols); - auto out = VectorMap(out_buf, rows); - - int64 mat_rows = rows; - int64 mat_cols = cols; - - if (transpose) { - std::swap(mat_rows, mat_cols); - } - - auto mat = MatrixMap(mat_buf, mat_rows, mat_cols); - - if (transpose) { - out = mat.transpose() * x; - } else { - out = mat * x; - } -} - -// Converts matmul-style args to matvec. -template -void DispatchMatVec(T* out, T* lhs, T* rhs, int64 m, int64 n, int64 k, - int32 transpose_lhs, int32 transpose_rhs) { - // If the input is in the form x * A, where x is the vector, then bring A back - // over to the left hand side. We make use of the identity - // - // (x * A)^T = A^T * x^T - // - // We do not need to take the transpose of x or of the result since taking - // the transpose of a vector does not change the memory layout. - const int64 cols = k; - - T* mat; - T* vec; - int64 rows; - bool transpose_mat; - - bool is_mat_vec = (n == 1); - - if (is_mat_vec) { - mat = lhs; - vec = rhs; - rows = m; - transpose_mat = transpose_lhs; - } else { - mat = rhs; - vec = lhs; - rows = n; - transpose_mat = !transpose_rhs; - } - - MatVec(out, mat, vec, rows, cols, transpose_mat); -} - -} // namespace - -namespace xla { - -void EigenMatVecF32(float* out, float* lhs, float* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { - assert((m == 1 || n == 1) && "not a matrix-vector multiply"); - DispatchMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); -} - -void EigenMatVecF64(double* out, double* lhs, double* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { - assert((m == 1 || n == 1) && "not a matrix-vector multiply"); - DispatchMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); -} - -} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matvec.h b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h index 1bd8dfb377..70eb98c541 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matvec.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h @@ -16,10 +16,86 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_ +#include "third_party/eigen3/Eigen/Core" + #include "tensorflow/core/platform/types.h" namespace xla { +namespace detail { + +using tensorflow::int32; +using tensorflow::int64; + +// Does mat * x or mat^T * x. +template +void MatVec(T* out_buf, T* mat_buf, T* x_buf, int64 rows, int64 cols, + int32 transpose) { + // Use an Eigen Matrix instead of a Tensor, as the GEMV from Matrix seems to + // be faster (b/30223679). See also: the matmul op kernel in TensorFlow, + // which implements the same optimization. + using Matrix = Eigen::Matrix; + using MatrixMap = Eigen::Map; + + using Vector = Eigen::Matrix; + using VectorMap = Eigen::Map; + + auto x = VectorMap(x_buf, cols); + auto out = VectorMap(out_buf, rows); + + int64 mat_rows = rows; + int64 mat_cols = cols; + + if (transpose) { + std::swap(mat_rows, mat_cols); + } + + auto mat = MatrixMap(mat_buf, mat_rows, mat_cols); + + if (transpose) { + out = mat.transpose() * x; + } else { + out = mat * x; + } +} + +// Converts matmul-style args to matvec. +template +void DispatchMatVec(T* out, T* lhs, T* rhs, int64 m, int64 n, int64 k, + int32 transpose_lhs, int32 transpose_rhs) { + // If the input is in the form x * A, where x is the vector, then bring A back + // over to the left hand side. We make use of the identity + // + // (x * A)^T = A^T * x^T + // + // We do not need to take the transpose of x or of the result since taking + // the transpose of a vector does not change the memory layout. + const int64 cols = k; + + T* mat; + T* vec; + int64 rows; + bool transpose_mat; + + bool is_mat_vec = (n == 1); + + if (is_mat_vec) { + mat = lhs; + vec = rhs; + rows = m; + transpose_mat = transpose_lhs; + } else { + mat = rhs; + vec = lhs; + rows = n; + transpose_mat = !transpose_rhs; + } + + MatVec(out, mat, vec, rows, cols, transpose_mat); +} + +} // namespace detail + // Performs a matrix-vector multiplication using Eigen. 'lhs' and 'rhs' are // pointers to buffers containing input matrices in column-major order. 'out' is // a pointer to a buffer sufficiently large to hold the result of the @@ -30,15 +106,15 @@ namespace xla { // // TODO(b/64684907): Compare runtime performance of these functions with dot // simplification. -void EigenMatVecF32(float* out, float* lhs, float* rhs, tensorflow::int64 m, - tensorflow::int64 n, tensorflow::int64 k, - tensorflow::int32 transpose_lhs, - tensorflow::int32 transpose_rhs); - -void EigenMatVecF64(double* out, double* lhs, double* rhs, tensorflow::int64 m, - tensorflow::int64 n, tensorflow::int64 k, - tensorflow::int32 transpose_lhs, - tensorflow::int32 transpose_rhs); +template +void EigenMatVec(T* out, T* lhs, T* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + assert((m == 1 || n == 1) && "not a matrix-vector multiply"); + detail::DispatchMatVec(out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); +} } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc index ee8eb08155..17303e2f0d 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc @@ -57,26 +57,38 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, C = A.contract(B, dims); } +template +void SingleThreadedMatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, + int64 m, int64 n, int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + if (m == 1 || n == 1) { + xla::EigenMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + } else { + MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); + } +} + } // namespace +void __xla_cpu_runtime_EigenSingleThreadedMatMulF16( + const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, + Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + SingleThreadedMatMul(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); +} + void __xla_cpu_runtime_EigenSingleThreadedMatMulF32( const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - xla::EigenMatVecF32(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + SingleThreadedMatMul(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); } void __xla_cpu_runtime_EigenSingleThreadedMatMulF64( const void* run_options_ptr, double* out, double* lhs, double* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - xla::EigenMatVecF64(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + SingleThreadedMatMul(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h index 029eb95142..9371a62242 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h @@ -25,6 +25,12 @@ extern "C" { // 'out' is a pointer to a buffer sufficiently large to hold the result of the // operation. Following standard nomenclature: lhs is m x k, rhs is k x n, and // out is m x n. +extern void __xla_cpu_runtime_EigenSingleThreadedMatMulF16( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, + Eigen::half* out, Eigen::half* lhs, Eigen::half* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); + extern void __xla_cpu_runtime_EigenSingleThreadedMatMulF32( const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index e8a375d637..80c24eaccf 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -181,10 +181,12 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenFft); + REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin); diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc index ba482793e7..ca54b2eed8 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc @@ -108,11 +108,13 @@ bool DoGemmWithAlgorithm(MatrixDescriptor lhs_matrix, return stream ->ThenBlasGemmWithAlgorithm( lhs_transpose, rhs_transpose, output_matrix.num_rows, - output_matrix.num_cols, /*size of reduce dim=*/k, /*alpha=*/1.0, - lhs_data, /*leading dim of LHS=*/lhs_matrix.num_rows, rhs_data, - /*leading dim of RHS=*/rhs_matrix.num_rows, /*beta=*/0.0, - &output_data, /*leading dim of output=*/output_matrix.num_rows, - computation_type, algorithm, output_profile_result) + output_matrix.num_cols, /*size of reduce dim=*/k, + /*alpha=*/static_cast(1.0f), lhs_data, + /*leading dim of LHS=*/lhs_matrix.num_rows, rhs_data, + /*leading dim of RHS=*/rhs_matrix.num_rows, + /*beta=*/static_cast(0.0f), &output_data, + /*leading dim of output=*/output_matrix.num_rows, computation_type, + algorithm, output_profile_result) .ok(); } @@ -161,6 +163,8 @@ StatusOr DoGemmAutotune( // DoGemm/DoGemmWithAlgorithm/DoGemmAutotune. auto GetGemmFn(PrimitiveType type) -> decltype(&DoGemm) { switch (type) { + case F16: + return &DoGemm; case F32: return &DoGemm; case F64: @@ -172,6 +176,8 @@ auto GetGemmFn(PrimitiveType type) -> decltype(&DoGemm) { auto GetGemmWithAlgorithmFn(PrimitiveType type) -> decltype(&DoGemmWithAlgorithm) { switch (type) { + case F16: + return &DoGemmWithAlgorithm; case F32: return &DoGemmWithAlgorithm; case F64: @@ -182,6 +188,8 @@ auto GetGemmWithAlgorithmFn(PrimitiveType type) } auto GetGemmAutotuneFn(PrimitiveType type) -> decltype(&DoGemmAutotune) { switch (type) { + case F16: + return &DoGemmAutotune; case F32: return &DoGemmAutotune; case F64: @@ -196,6 +204,10 @@ auto GetGemmAutotuneFn(PrimitiveType type) -> decltype(&DoGemmAutotune) { // separately from the precision of the inputs and result. se::blas::ComputationType GetBlasComputationType(PrimitiveType type) { switch (type) { + case F16: + // Use F32 as computation type for F16 as we currently only implement the + // cuDNN pseudo half configuration for half precision. + return se::blas::ComputationType::kF32; case F32: return se::blas::ComputationType::kF32; case F64: @@ -315,6 +327,9 @@ tensorflow::Status GemmThunk::ExecuteOnStream( stream, /*output_profile_result=*/nullptr); } + + // Autotune will fail when CUDA 8 and GPU sm_50 or older are used. + // Use the older Gemm API in this case. return GetGemmFn(element_type)(lhs_matrix, rhs_matrix, output_matrix, stream); }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index 2f65edffea..1b89dfa7ae 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -49,8 +49,10 @@ bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, // The inputs and the output must // 1) be matrices with no padding and a non-zero number of elements, // 2) have an allowed element type. - bool type_is_allowed = (output_shape.element_type() == F32 || - output_shape.element_type() == F64); + PrimitiveType output_primitive_type = output_shape.element_type(); + bool type_is_allowed = + (output_primitive_type == F16 || output_primitive_type == F32 || + output_primitive_type == F64); return type_is_allowed && IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && IsRank2WithNoPadding(output_shape) && diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 923315e001..fb66f69709 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -321,6 +321,15 @@ class ShapeUtil { static Shape MakeShape(PrimitiveType element_type, tensorflow::gtl::ArraySlice dimensions); + // Creates a Shape with element type corresponding to T and the given + // dimensions + template + static Shape MakeShapeWithType( + tensorflow::gtl::ArraySlice dimensions) { + return ShapeUtil::MakeShape(primitive_util::NativeToPrimitiveType(), + dimensions); + } + // Constructs a new shape with the given minor_to_major order in its Layout. // Returns a value shape such that shape.has_layout(). static Shape MakeShapeWithLayout( diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index dc282f2440..63f4a4430f 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1299,6 +1299,7 @@ xla_test( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index e2b5c91653..99640f5bb5 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -53,26 +53,12 @@ class ConvolutionTest : public ClientLibraryTestBase { #endif }; -#if (XLA_TEST_BACKEND_GPU || XLA_TEST_BACKEND_CPU) -using TestTypes = ::testing::Types; -#else +#ifdef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 using TestTypes = ::testing::Types; +#else +using TestTypes = ::testing::Types; #endif -template -Shape MakeShapeWrapper(tensorflow::gtl::ArraySlice dimensions); - -template <> -Shape MakeShapeWrapper(tensorflow::gtl::ArraySlice dimensions) { - return ShapeUtil::MakeShape(F32, dimensions); -} - -template <> -Shape MakeShapeWrapper( - tensorflow::gtl::ArraySlice dimensions) { - return ShapeUtil::MakeShape(F16, dimensions); -} - template class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest { public: @@ -121,8 +107,8 @@ class Convolve_1x1x1x2_1x1x1x2_Valid : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 1, 2}); - Shape filter_shape = MakeShapeWrapper({1, 1, 1, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 1, 2}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 1, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -152,8 +138,8 @@ class Convolve_1x1x4x4_1x1x2x2_Valid : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); - Shape filter_shape = MakeShapeWrapper({1, 1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -186,8 +172,8 @@ class Convolve_1x1x4x4_1x1x2x2_Same : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); - Shape filter_shape = MakeShapeWrapper({1, 1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); @@ -222,8 +208,8 @@ class Convolve_1x1x4x4_1x1x3x3_Same : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); - Shape filter_shape = MakeShapeWrapper({1, 1, 3, 3}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 3, 3}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); @@ -280,8 +266,8 @@ class Convolve1D_1x2x5_1x2x2_WithRHSDilation : public ConvolutionTest { void RunTest() { ComputationBuilder builder(client_, TestName()); { - Shape input_shape = MakeShapeWrapper({1, 2, 5}); - Shape filter_shape = MakeShapeWrapper({1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); // Convolution dimensions are bf0_oi0->bo0. @@ -381,8 +367,8 @@ class Convolve1D_1x2x5_1x2x2_WithPadding : public ConvolutionTest { void RunTest() { ComputationBuilder builder(client_, TestName()); { - Shape input_shape = MakeShapeWrapper({1, 2, 5}); - Shape filter_shape = MakeShapeWrapper({1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); // Convolution dimensions are bf0_oi0->bo0. @@ -486,8 +472,8 @@ class Convolve2D_1x3x3x5_3x3x5x5_Valid : public ConvolutionTest { ComputationBuilder builder(client_, TestName()); std::vector input_dims = {1, 3, 3, 5}; std::vector filter_dims = {3, 3, 5, 3}; - Shape input_shape = MakeShapeWrapper(input_dims); - Shape filter_shape = MakeShapeWrapper(filter_dims); + Shape input_shape = ShapeUtil::MakeShapeWithType(input_dims); + Shape filter_shape = ShapeUtil::MakeShapeWithType(filter_dims); { auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); @@ -611,8 +597,8 @@ class Convolve1D1WindowTestBase input_feature}; std::vector filter_dims = {window_size, input_feature, output_feature}; - Shape input_shape = MakeShapeWrapper(input_dims); - Shape filter_shape = MakeShapeWrapper(filter_dims); + Shape input_shape = ShapeUtil::MakeShapeWithType(input_dims); + Shape filter_shape = ShapeUtil::MakeShapeWithType(filter_dims); { auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 815962094a..09b1dd283e 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -34,169 +34,194 @@ limitations under the License. namespace xla { namespace { -// TODO(b/34468543): use GUnit typed tests when we can do all tests on all -// backends. class DotOperationTest : public ClientLibraryTestBase { public: ErrorSpec error_spec_{0.0001, 1e-5}; - - protected: - template - void TestOneElementVectorDot(); - template - void TestVectorDot(); - template - void TestSquareMatrixDot(bool lhs_row_major = false, - bool rhs_row_major = false); - template - void TestNonsquareMatrixDot(bool lhs_row_major = false, - bool rhs_row_major = false); }; -XLA_TEST_F(DotOperationTest, ZeroElementVectorDotF32) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR1({}); - auto rhs = builder.ConstantR1({}); +#if defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16) && \ + defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64) +using TypesF16F32 = ::testing::Types; +using TypesF16F32F64 = ::testing::Types; +using TypesF16F32F64CF64 = ::testing::Types; +#elif !defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16) && \ + !defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64) +using TypesF16F32 = ::testing::Types; +using TypesF16F32F64 = ::testing::Types; +using TypesF16F32F64CF64 = + ::testing::Types; +#else +#error "Situation not handled yet" +#endif + +template +class DotOperationTest_F16F32F64CF64 : public DotOperationTest {}; +TYPED_TEST_CASE(DotOperationTest_F16F32F64CF64, TypesF16F32F64CF64); + +XLA_TYPED_TEST(DotOperationTest_F16F32F64CF64, ZeroElementVectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + + auto lhs = builder.ConstantR1({}); + auto rhs = builder.ConstantR1({}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR0(&builder, 0.0, {}, error_spec_); + this->template ComputeAndCompareR0(&builder, static_cast(0.0), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, TrivialMatrixVectorDotF32) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2({{3.0, 4.0}}); - auto rhs = builder.ConstantR1({3.0, 4.0}); - auto result = builder.Dot(lhs, rhs); +template +class DotOperationTest_F16F32F64 : public DotOperationTest {}; +TYPED_TEST_CASE(DotOperationTest_F16F32F64, TypesF16F32F64); - ComputeAndCompareR1(&builder, {25.0}, {}, error_spec_); -} - -template -void DotOperationTest::TestOneElementVectorDot() { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR1({2.0}); - auto rhs = builder.ConstantR1({3.0}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, TrivialMatrixVectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D({{3.0f, 4.0f}}); + auto rhs = builder.ConstantFromArray({3.0f, 4.0f}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR0(&builder, 6.0, {}, error_spec_); + this->template ComputeAndCompareR1(&builder, {static_cast(25.0f)}, {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, OneElementVectorDotF32) { - TestOneElementVectorDot(); -} +XLA_TYPED_TEST(DotOperationTest_F16F32F64, OneElementVectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR1({static_cast(2.0f)}); + auto rhs = builder.ConstantR1({static_cast(3.0f)}); + auto result = builder.Dot(lhs, rhs); -XLA_TEST_F(DotOperationTest, OneElementVectorDotF64) { - TestOneElementVectorDot(); + this->template ComputeAndCompareR0(&builder, static_cast(6.0f), {}, + this->error_spec_); } -template -void DotOperationTest::TestVectorDot() { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR1({1.0, 2.5, 42.0}); - auto rhs = builder.ConstantR1({11.0, -1.0, 0.5}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, VectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantFromArray({1.0f, 2.5f, 42.0f}); + auto rhs = builder.ConstantFromArray({11.0f, -1.0f, 0.5f}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR0(&builder, 29.5, {}, error_spec_); + this->template ComputeAndCompareR0(&builder, static_cast(29.5f), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, VectorDotF32) { TestVectorDot(); } - -XLA_TEST_F(DotOperationTest, VectorDotF64) { TestVectorDot(); } - -namespace { - std::vector MinorToMajorForIsRowMajor(bool row_major) { return {row_major ? 1 : 0, row_major ? 0 : 1}; } -} // namespace - -XLA_TEST_F(DotOperationTest, Dot_0x2_2x0) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); - auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_0x2_2x0) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); + auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(0, 0), {}, error_spec_); + this->template ComputeAndCompareR2(&builder, Array2D(0, 0), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, Dot_0x2_2x3) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); - auto rhs = builder.ConstantR2({{7.0, 8.0, 9.0}, {42.0, 77.0, 101.0}}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_0x2_2x3) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); + auto rhs = builder.ConstantR2FromArray2D( + {{7.0f, 8.0f, 9.0f}, {42.0f, 77.0f, 101.0f}}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(0, 3), {}, error_spec_); + this->template ComputeAndCompareR2(&builder, Array2D(0, 3), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, Dot_3x2_2x0) { - ComputationBuilder builder(client_, TestName()); - auto lhs = - builder.ConstantR2({{7.0, 8.0}, {9.0, 42.0}, {77.0, 101.0}}); - auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_3x2_2x0) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D( + {{7.0f, 8.0f}, {9.0f, 42.0f}, {77.0f, 101.0f}}); + auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(3, 0), {}, error_spec_); + this->template ComputeAndCompareR2(&builder, Array2D(3, 0), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, Dot_2x0_0x2) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); - auto rhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_2x0_0x2) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); + auto rhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(2, 2, 0.0f), {}, - error_spec_); + this->template ComputeAndCompareR2( + &builder, Array2D(2, 2, static_cast(0.0f)), {}, this->error_spec_); } -XLA_TEST_F(DotOperationTest, FusedDot) { - ComputationBuilder builder(client_, TestName()); - auto param0 = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 4}), "arg0"); - auto param1 = builder.Parameter(1, ShapeUtil::MakeShape(F32, {4, 1}), "arg1"); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, FusedDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto param0 = + builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 4}), "arg0"); + auto param1 = + builder.Parameter(1, ShapeUtil::MakeShapeWithType({4, 1}), "arg1"); auto exp0 = builder.Exp(param0); auto result = builder.Dot(exp0, param1); - auto lhs_handle = client_ - ->TransferToServer(*Literal::CreateR2( - {{1.0, 2.0, 3.0, 4.0}, {-1.0, -2.0, -3.0, -4.0}})) - .ConsumeValueOrDie(); - auto rhs_handle = client_ - ->TransferToServer(*Literal::CreateR2( - {{1.0}, {2.0}, {3.0}, {4.0}})) - .ConsumeValueOrDie(); - - ComputeAndCompareR2( - &builder, Array2D({{296.14560492846033}, {0.8611737683031964}}), - {lhs_handle.get(), rhs_handle.get()}, error_spec_); -} - -template -void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major, - bool rhs_row_major) { auto lhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) - .ConsumeValueOrDie(); - auto rhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 6.0}, {7.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) + this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2D( + {{1.0f, 2.0f, 3.0f, 4.0f}, {-1.0f, -2.0f, -3.0f, -4.0f}})) .ConsumeValueOrDie(); + auto rhs_handle = this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2D( + {{1.0f}, {2.0f}, {3.0f}, {4.0f}})) + .ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); - auto prim_type = primitive_util::NativeToPrimitiveType(); - auto result = builder.Dot( - builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), "lhs"), - builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 2}), "rhs")); + if (std::is_same::value) { + this->error_spec_ = ErrorSpec{0.0001, 1e-3}; + } - Array2D expected({{15.0, -2.0}, {-25.0, 34.0}}); - ComputeAndCompareR2( - &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); + this->template ComputeAndCompareR2( + &builder, Array2D({{296.14560492846033f}, {0.8611737683031964f}}), + {lhs_handle.get(), rhs_handle.get()}, this->error_spec_); } +template +class SquareMatrixDot : public DotOperationTest { + public: + void TestImpl(bool lhs_row_major, bool rhs_row_major) { + auto lhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 2.0f}, {3.0f, -4.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(lhs_row_major)))) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 6.0f}, {7.0f, -4.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(rhs_row_major)))) + .ConsumeValueOrDie(); + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 2}), "rhs")); + + Array2D expected({{15.0f, -2.0f}, {-25.0f, 34.0f}}); + ComputeAndCompareR2(&builder, expected, + {lhs_handle.get(), rhs_handle.get()}, error_spec_); + } +}; + +TYPED_TEST_CASE(SquareMatrixDot, TypesF16F32F64CF64); +XLA_TYPED_TEST(SquareMatrixDot, TypesFF) { this->TestImpl(false, false); } +XLA_TYPED_TEST(SquareMatrixDot, TypesFT) { this->TestImpl(false, true); } +XLA_TYPED_TEST(SquareMatrixDot, TypesTF) { this->TestImpl(true, false); } +XLA_TYPED_TEST(SquareMatrixDot, TypesTT) { this->TestImpl(true, true); } + struct DotTestParam { int m; int k; @@ -302,14 +327,13 @@ void ParametricDotTest::TestImpl() { if (param.has_addend) { args.push_back(addend_handle.get()); } - - ComputeAndCompareR2(&builder, *expected, args, ErrorSpec(0.3, 3e-3)); + ErrorSpec error_spec(0.3, 3e-3); + if (std::is_same::value) { + error_spec = ErrorSpec(0.3, 5e-3); + } + ComputeAndCompareR2(&builder, *expected, args, error_spec); } -XLA_TEST_P(ParametricDotTest, TestF32) { TestImpl(); } - -XLA_TEST_P(ParametricDotTest, TestF64) { TestImpl(); } - std::vector CreateDotTestParameters() { std::vector params; @@ -331,6 +355,12 @@ std::vector CreateDotTestParameters() { return params; } +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +XLA_TEST_P(ParametricDotTest, TestF16) { TestImpl(); } +#endif +XLA_TEST_P(ParametricDotTest, TestF32) { TestImpl(); } +XLA_TEST_P(ParametricDotTest, TestF64) { TestImpl(); } + INSTANTIATE_TEST_CASE_P(DotTests, ParametricDotTest, ::testing::ValuesIn(CreateDotTestParameters()), PrintDotTestParam); @@ -343,14 +373,6 @@ class ParametricDotTestWithoutLayoutAssignment : public ParametricDotTest { } }; -XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF32) { - TestImpl(); -} - -XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF64) { - TestImpl(); -} - std::vector CreateNoLayoutAssignmentDotTestParameters() { std::vector params; @@ -407,110 +429,60 @@ std::vector CreateNoLayoutAssignmentDotTestParameters() { return params; } -INSTANTIATE_TEST_CASE_P( - DotTests, ParametricDotTestWithoutLayoutAssignment, - ::testing::ValuesIn(CreateNoLayoutAssignmentDotTestParameters()), - PrintDotTestParam); - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) { - TestSquareMatrixDot(false, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFT) { - TestSquareMatrixDot(false, true); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTF) { - TestSquareMatrixDot(true, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTT) { - TestSquareMatrixDot(true, true); +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF16) { + TestImpl(); } - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFF) { - TestSquareMatrixDot(false, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFT) { - TestSquareMatrixDot(false, true); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTF) { - TestSquareMatrixDot(true, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTT) { - TestSquareMatrixDot(true, true); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF64) { - TestSquareMatrixDot(); -} - -template -void DotOperationTest::TestNonsquareMatrixDot(bool lhs_row_major, - bool rhs_row_major) { - auto lhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) - .ConsumeValueOrDie(); - auto rhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) - .ConsumeValueOrDie(); - - ComputationBuilder builder(client_, TestName()); - auto prim_type = primitive_util::NativeToPrimitiveType(); - auto result = builder.Dot( - builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 3}), "lhs"), - builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {3, 2}), "rhs")); - - Array2D expected({{26.0, 0.0}, {-12.0, 10.0}}); - - ComputeAndCompareR2( - &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFF) { - TestNonsquareMatrixDot(false, false); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFT) { - TestNonsquareMatrixDot(false, true); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTF) { - TestNonsquareMatrixDot(true, false); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTT) { - TestNonsquareMatrixDot(true, true); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF64) { - TestNonsquareMatrixDot(); +#endif +XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF32) { + TestImpl(); } - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorFF) { - TestNonsquareMatrixDot(false, false); +XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF64) { + TestImpl(); } -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorFT) { - TestNonsquareMatrixDot(false, true); -} +INSTANTIATE_TEST_CASE_P( + DotTests, ParametricDotTestWithoutLayoutAssignment, + ::testing::ValuesIn(CreateNoLayoutAssignmentDotTestParameters()), + PrintDotTestParam); -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorTF) { - TestNonsquareMatrixDot(true, false); -} +template +class NonsquareMatrixDot : public DotOperationTest { + public: + void TestImpl(bool lhs_row_major, bool rhs_row_major) { + auto lhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 2.0f, 3.0f}, {3.0f, -4.0f, -1.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(lhs_row_major)))) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 6.0f}, {2.0f, 3.0f}, {7.0f, -4.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(rhs_row_major)))) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 3}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {3, 2}), "rhs")); + + Array2D expected({{26.0f, 0.0f}, {-12.0f, 10.0f}}); + + ComputeAndCompareR2(&builder, expected, + {lhs_handle.get(), rhs_handle.get()}, error_spec_); + } +}; -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorTT) { - TestNonsquareMatrixDot(true, true); -} +TYPED_TEST_CASE(NonsquareMatrixDot, TypesF16F32F64CF64); +XLA_TYPED_TEST(NonsquareMatrixDot, TestFF) { this->TestImpl(false, false); } +XLA_TYPED_TEST(NonsquareMatrixDot, TestFT) { this->TestImpl(false, true); } +XLA_TYPED_TEST(NonsquareMatrixDot, TestTF) { this->TestImpl(true, false); } +XLA_TYPED_TEST(NonsquareMatrixDot, TestTT) { this->TestImpl(true, true); } XLA_TEST_F(DotOperationTest, MatrixVectorC64) { auto lhs_handle = @@ -537,25 +509,35 @@ XLA_TEST_F(DotOperationTest, MatrixVectorC64) { &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); } -XLA_TEST_F(DotOperationTest, ConcurrentMatMul) { - ComputationBuilder builder(client_, TestName()); - auto matrix1 = builder.ConstantR2({{1.0, 2.0}, {3.0, 4.0}}); - auto matrix2 = builder.ConstantR2({{5.0, 6.0}, {7.0, 8.0}}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, ConcurrentMatMult) { + using T = TypeParam; + + ComputationBuilder builder(this->client_, this->TestName()); + auto matrix1 = builder.ConstantR2FromArray2D({{1.0f, 2.0f}, {3.0f, 4.0f}}); + auto matrix2 = builder.ConstantR2FromArray2D({{5.0f, 6.0f}, {7.0f, 8.0f}}); auto matrix12 = builder.Dot(matrix1, matrix2); auto matrix21 = builder.Dot(matrix2, matrix1); builder.Add(matrix12, matrix21); - Array2D expected({{42.0, 56.0}, {74.0, 96.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); + Array2D expected({{42.0f, 56.0f}, {74.0f, 96.0f}}); + this->template ComputeAndCompareR2(&builder, expected, {}, + this->error_spec_); } +template +class DotOperationTestForBatchMatMul : public DotOperationTest {}; +TYPED_TEST_CASE(DotOperationTestForBatchMatMul, TypesF16F32F64); + // Regression test for b/32055648. The root of the graph is a kFusion of 4 // bitcasts. Although bitcasts don't map to thunks, the root should still be // sync-dependent on bitcasts' operands. -XLA_TEST_F(DotOperationTest, BatchMatMul) { - ComputationBuilder builder(client_, TestName()); - auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2, 2, 2}), "x"); - auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {2, 2, 2, 2}), "y"); +XLA_TYPED_TEST(DotOperationTestForBatchMatMul, Types) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto x = + builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 2, 2, 2}), "x"); + auto y = + builder.Parameter(1, ShapeUtil::MakeShapeWithType({2, 2, 2, 2}), "y"); auto x_flat = builder.Reshape(x, {0, 1, 2, 3}, {4, 2, 2}); auto y_flat = builder.Reshape(y, {0, 1, 2, 3}, {4, 2, 2}); @@ -576,29 +558,42 @@ XLA_TEST_F(DotOperationTest, BatchMatMul) { auto out_flat = builder.ConcatInDim(out_slices, 0); builder.Reshape(out_flat, {0, 1, 2}, {2, 2, 2, 2}); - auto x_data = client_ - ->TransferToServer(*Literal::CreateR4( - {{{{1000, 100}, {10, 1}}, {{2000, 200}, {20, 2}}}, - {{{3000, 300}, {30, 3}}, {{4000, 400}, {40, 4}}}})) - .ConsumeValueOrDie(); - auto y_data = client_ - ->TransferToServer(*Literal::CreateR4( - {{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}, - {{{11, 22}, {33, 44}}, {{55, 66}, {77, 88}}}})) + auto x_data = this->client_ + ->TransferToServer(*Literal::CreateR4FromArray4D( + {{{{1000.0f, 100.0f}, {10.0f, 1.0f}}, + {{2000.0f, 200.0f}, {20.0f, 2.0f}}}, + {{{3000.0f, 300.0f}, {30.0f, 3.0f}}, + {{4000.0f, 400.0f}, {40.0f, 4.0f}}}})) .ConsumeValueOrDie(); + auto y_data = + this->client_ + ->TransferToServer(*Literal::CreateR4FromArray4D( + {{{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}, + {{{11.0f, 22.0f}, {33.0f, 44.0f}}, + {{55.0f, 66.0f}, {77.0f, 88.0f}}}})) + .ConsumeValueOrDie(); - ComputeAndCompareR4( + if (std::is_same::value) { + this->error_spec_ = ErrorSpec{0.0001, 1e-3}; + } + this->template ComputeAndCompareR4( &builder, /*expected=*/ - {{{{1300, 2400}, {13, 24}}, {{11400, 13600}, {114, 136}}}, - {{{42900, 79200}, {429, 792}}, {{250800, 299200}, {2508, 2992}}}}, - {x_data.get(), y_data.get()}, error_spec_); + {{{{1300.0f, 2400.0f}, {13.0f, 24.0f}}, + {{11400.0f, 13600.0f}, {114.0f, 136.0f}}}, + {{{42900.0f, 79200.0f}, {429.0f, 792.0f}}, + {{250800.0f, 299200.0f}, {2508.0f, 2992.0f}}}}, + {x_data.get(), y_data.get()}, this->error_spec_); } -XLA_TEST_F(DotOperationTest, GeneralMatMul) { - ComputationBuilder builder(client_, TestName()); - auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2, 2}), "x"); - auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {2, 2, 2}), "y"); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, GeneralMatMul) { + using T = TypeParam; + + ComputationBuilder builder(this->client_, this->TestName()); + auto x = + builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 2, 2}), "x"); + auto y = + builder.Parameter(1, ShapeUtil::MakeShapeWithType({2, 2, 2}), "y"); DotDimensionNumbers dnums; dnums.add_lhs_contracting_dimensions(2); @@ -608,31 +603,34 @@ XLA_TEST_F(DotOperationTest, GeneralMatMul) { auto out = builder.DotGeneral(x, y, dnums); - auto x_data = client_ - ->TransferToServer(*Literal::CreateR3( - {{{1.0, 2.0}, {3.0, 4.0}}, {{5.0, 6.0}, {7.0, 8.0}}})) - .ConsumeValueOrDie(); + auto x_data = + this->client_ + ->TransferToServer(*Literal::CreateR3FromArray3D( + {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}})) + .ConsumeValueOrDie(); - auto y_data = client_ - ->TransferToServer(*Literal::CreateR3( - {{{1.0, 0.0}, {0.0, 1.0}}, {{1.0, 0.0}, {0.0, 1.0}}})) - .ConsumeValueOrDie(); + auto y_data = + this->client_ + ->TransferToServer(*Literal::CreateR3FromArray3D( + {{{1.0f, 0.0f}, {0.0f, 1.0f}}, {{1.0f, 0.0f}, {0.0f, 1.0f}}})) + .ConsumeValueOrDie(); - ComputeAndCompareR3( + this->template ComputeAndCompareR3( &builder, /*expected=*/ - {{{1.0, 2.0}, {3.0, 4.0}}, {{5.0, 6.0}, {7.0, 8.0}}}, - {x_data.get(), y_data.get()}, error_spec_); + {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}, + {x_data.get(), y_data.get()}, this->error_spec_); } -TEST_F(DotOperationTest, TransposeFolding) { +XLA_TYPED_TEST(DotOperationTest_F16F32F64, TransposeFolding) { + using T = TypeParam; for (bool transpose_lhs : {false, true}) { for (bool transpose_rhs : {false, true}) { for (bool row_major : {false, true}) { - std::unique_ptr> lhs( - new Array2D({{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}})); - std::unique_ptr> rhs( - new Array2D({{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}})); + std::unique_ptr> lhs( + new Array2D({{1.0f, 2.0f, 3.0f}, {3.0f, -4.0f, -1.0f}})); + std::unique_ptr> rhs( + new Array2D({{1.0f, 6.0f}, {2.0f, 3.0f}, {7.0f, -4.0f}})); if (transpose_lhs) { lhs = ReferenceUtil::TransposeArray2D(*lhs); @@ -641,22 +639,20 @@ TEST_F(DotOperationTest, TransposeFolding) { rhs = ReferenceUtil::TransposeArray2D(*rhs); } auto lhs_handle = - client_ - ->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - *lhs, LayoutUtil::MakeLayout( - MinorToMajorForIsRowMajor(row_major)))) + this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + *lhs, LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(row_major)))) .ConsumeValueOrDie(); auto rhs_handle = - client_ - ->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - *rhs, LayoutUtil::MakeLayout( - MinorToMajorForIsRowMajor(row_major)))) + this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + *rhs, LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(row_major)))) .ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); - auto prim_type = primitive_util::NativeToPrimitiveType(); + ComputationBuilder builder(this->client_, this->TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); auto lhs_arg = builder.Parameter( 0, ShapeUtil::MakeShape(prim_type, {lhs->height(), lhs->width()}), "lhs"); @@ -671,24 +667,27 @@ TEST_F(DotOperationTest, TransposeFolding) { } auto result = builder.Dot(lhs_arg, rhs_arg); - Array2D expected({{26.0, 0.0}, {-12.0, 10.0}}); + Array2D expected({{26.0f, 0.0f}, {-12.0f, 10.0f}}); VLOG(1) << "TestTransposeFolding " << transpose_lhs << " " << transpose_rhs << " " << row_major; - ComputeAndCompareR2(&builder, expected, - {lhs_handle.get(), rhs_handle.get()}, - error_spec_); + this->template ComputeAndCompareR2( + &builder, expected, {lhs_handle.get(), rhs_handle.get()}, + this->error_spec_); } } } } -TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstLHS) { - auto prim_type = primitive_util::NativeToPrimitiveType(); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, + DotOfConcatOptimizationWithConstLHS) { + using T = TypeParam; + auto prim_type = primitive_util::NativeToPrimitiveType(); - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}, + {6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}})); - ComputationBuilder builder(client_, TestName()); + ComputationBuilder builder(this->client_, this->TestName()); auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); auto rhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), "rhs_arg_0"); @@ -699,78 +698,80 @@ TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstLHS) { auto result = builder.Dot( lhs_constant, builder.ConcatInDim({rhs_arg_0, rhs_arg_1, rhs_arg_2}, 0)); - std::unique_ptr> arg_0_value_array( - new Array2D({{1.0, 2.0}, {3.0, 4.0}})); - std::unique_ptr> arg_1_value_array( - new Array2D({{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}})); - std::unique_ptr> arg_2_value_array( - new Array2D({{1.0, 2.0}})); + std::unique_ptr> arg_0_value_array( + new Array2D({{1.0f, 2.0f}, {3.0f, 4.0f}})); + std::unique_ptr> arg_1_value_array( + new Array2D({{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}})); + std::unique_ptr> arg_2_value_array(new Array2D({{1.0f, 2.0f}})); TF_ASSERT_OK_AND_ASSIGN( auto arg_0_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_0_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_0_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_1_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_1_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_1_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_2_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_2_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_2_value_array))); - Array2D expected({{53.0, 74.0}, {45.0, 66.0}}); - ComputeAndCompareR2( + Array2D expected({{53.0f, 74.0f}, {45.0f, 66.0f}}); + this->template ComputeAndCompareR2( &builder, expected, - {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); -} - -TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) { - auto prim_type = primitive_util::NativeToPrimitiveType(); - - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0}, - {3.0, 4.0}, - {5.0, 6.0}, - {6.0, 5.0}, - {4.0, 3.0}, - {2.0, 1.0}})); - - ComputationBuilder builder(client_, TestName()); + {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, + this->error_spec_); +} + +XLA_TYPED_TEST(DotOperationTest_F16F32F64, + DotOfConcatOptimizationWithConstRHS) { + using T = TypeParam; + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0f, 2.0f}, + {3.0f, 4.0f}, + {5.0f, 6.0f}, + {6.0f, 5.0f}, + {4.0f, 3.0f}, + {2.0f, 1.0f}})); + + ComputationBuilder builder(this->client_, this->TestName()); auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto lhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), + auto lhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 2}), "lhs_arg_0"); - auto lhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 3}), + auto lhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShapeWithType({2, 3}), "lhs_arg_1"); - auto lhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {2, 1}), + auto lhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShapeWithType({2, 1}), "lhs_arg_2"); auto result = builder.Dot( builder.ConcatInDim({lhs_arg_0, lhs_arg_1, lhs_arg_2}, 1), rhs_constant); - std::unique_ptr> arg_0_value_array( - new Array2D({{1.0, 2.0}, {3.0, 4.0}})); - std::unique_ptr> arg_1_value_array( - new Array2D({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}})); - std::unique_ptr> arg_2_value_array( - new Array2D({{1.0}, {2.0}})); + std::unique_ptr> arg_0_value_array( + new Array2D({{1.0f, 2.0f}, {3.0f, 4.0f}})); + std::unique_ptr> arg_1_value_array( + new Array2D({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}})); + std::unique_ptr> arg_2_value_array( + new Array2D({{1.0f}, {2.0f}})); TF_ASSERT_OK_AND_ASSIGN( auto arg_0_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_0_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_0_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_1_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_1_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_1_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_2_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_2_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_2_value_array))); - Array2D expected({{38.0, 36.0}, {93.0, 91.0}}); - ComputeAndCompareR2( + Array2D expected({{38.0f, 36.0f}, {93.0f, 91.0f}}); + this->template ComputeAndCompareR2( &builder, expected, - {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); + {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, + this->error_spec_); } + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc index 6c86dd5b9e..c42f71388b 100644 --- a/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc +++ b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc @@ -29,6 +29,8 @@ limitations under the License. #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" @@ -38,258 +40,223 @@ limitations under the License. namespace xla { namespace { -class MatOpsSimpleTest : public ClientLibraryTestBase { - protected: - Computation BuildSum() { - // sum(x, y) = x + y - ComputationBuilder builder(client_, "sum"); - auto x_value = - builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x_value"); - auto y_value = - builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y_value"); - builder.Add(x_value, y_value); - auto computation_status = builder.Build(); - TF_CHECK_OK(computation_status.status()); - return computation_status.ConsumeValueOrDie(); - } - - void TestLinspaceMax(int64 rows, int64 cols) { - float from = -128.0, to = 256.0; - std::unique_ptr> alhs = - MakeLinspaceArray2D(from, to, rows, cols); - auto arhs = MakeUnique>(rows, cols, 1.0); - - ComputationBuilder builder( - client_, - tensorflow::strings::Printf("max_%lldx%lld_linspace", rows, cols)); - auto lhs = builder.ConstantR2FromArray2D(*alhs); - auto rhs = builder.ConstantR2FromArray2D(*arhs); - auto max = builder.Max(lhs, rhs); - - Array2D aexpected(rows, cols); - for (int row = 0; row < rows; ++row) { - for (int col = 0; col < cols; ++col) { - aexpected(row, col) = std::max((*alhs)(row, col), (*arhs)(row, col)); - } - } - - ComputeAndCompareR2(&builder, aexpected, {}, ErrorSpec(1e-6)); - } -}; - -TEST_F(MatOpsSimpleTest, ExpTwoByTwoValues) { - ComputationBuilder builder(client_, "exp_2x2"); - auto data = builder.ConstantR2({ - {1.0, 0.0}, // row 0 - {-1.0, 0.5}, // row 1 +#ifdef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +using TypesF16F32 = ::testing::Types; +#else +using TypesF16F32 = ::testing::Types; +#endif + +class MatOpsSimpleTest : public ClientLibraryTestBase {}; + +template +class MatOpsSimpleTest_F16F32 : public MatOpsSimpleTest {}; + +// TODO(bixia): This test for F16 failed on GPU 02-25-2018. +#ifdef XLA_TEST_BACKEND_GPU +TYPED_TEST_CASE(MatOpsSimpleTest_F16F32, ::testing::Types); +#else +TYPED_TEST_CASE(MatOpsSimpleTest_F16F32, TypesF16F32); +#endif + +XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, ExpTwoByTwoValues) { + using T = TypeParam; + ComputationBuilder builder(this->client_, "exp_2x2"); + auto data = builder.ConstantR2FromArray2D({ + {1.0f, 0.0f}, // row 0 + {-1.0f, 0.5f}, // row 1 }); builder.Exp(data); std::unique_ptr expected = - Literal::CreateR2({{2.71828, 1.00000}, // row 0 - {0.36788, 1.64872}}); // row 1 + Literal::CreateR2FromArray2D({{2.71828f, 1.00000f}, // row 0 + {0.36788f, 1.64872f}}); // row 1 - ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); + this->template ComputeAndCompareLiteral(&builder, *expected, {}, + ErrorSpec(1e-5)); } -TEST_F(MatOpsSimpleTest, MapTwoByTwo) { +XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MapTwoByTwo) { + using T = TypeParam; Computation add_half; { // add_half(x) = x + 0.5 - ComputationBuilder builder(client_, "add_half"); + ComputationBuilder builder(this->client_, "add_half"); auto x_value = - builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x_value"); - auto half = builder.ConstantR0(0.5); + builder.Parameter(0, ShapeUtil::MakeShapeWithType({}), "x_value"); + auto half = builder.ConstantR0(static_cast(0.5)); builder.Add(x_value, half); auto computation_status = builder.Build(); ASSERT_IS_OK(computation_status.status()); add_half = computation_status.ConsumeValueOrDie(); } - ComputationBuilder builder(client_, "map_2x2"); - auto data = builder.ConstantR2({ - {1.0, 0.0}, // row 0 - {-1.0, 0.5}, // row 1 + ComputationBuilder builder(this->client_, "map_2x2"); + auto data = builder.ConstantR2FromArray2D({ + {1.0f, 0.0f}, // row 0 + {-1.0f, 0.5f}, // row 1 }); auto map = builder.Map({data}, add_half, {0, 1}); std::unique_ptr expected = - Literal::CreateR2({{1.5, 0.5}, // row 0 - {-0.5, 1.0}}); // row 1 - ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); + Literal::CreateR2FromArray2D({{1.5f, 0.5f}, // row 0 + {-0.5f, 1.0f}}); // row 1 + this->template ComputeAndCompareLiteral(&builder, *expected, {}, + ErrorSpec(1e-5)); } -TEST_F(MatOpsSimpleTest, MaxTwoByTwoValues) { - ComputationBuilder builder(client_, "max_2x2"); - auto lhs = builder.ConstantR2({ - {7.0, 2.0}, // row 0 - {3.0, -4.0}, // row 1 +XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MaxTwoByTwoValues) { + using T = TypeParam; + ComputationBuilder builder(this->client_, "max_2x2"); + auto lhs = builder.ConstantR2FromArray2D({ + {7.0f, 2.0f}, // row 0 + {3.0f, -4.0f}, // row 1 }); - auto rhs = builder.ConstantR2({ - {5.0, 6.0}, // row 0 - {1.0, -8.0}, // row 1 + auto rhs = builder.ConstantR2FromArray2D({ + {5.0f, 6.0f}, // row 0 + {1.0f, -8.0f}, // row 1 }); auto max = builder.Max(lhs, rhs); std::unique_ptr expected = - Literal::CreateR2({{7.0, 6.0}, // row 0 - {3.0, -4.0}}); // row 1 - ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-6)); + Literal::CreateR2FromArray2D({{7.0f, 6.0f}, // row 0 + {3.0f, -4.0f}}); // row 1 + this->template ComputeAndCompareLiteral(&builder, *expected, {}, + ErrorSpec(1e-6)); } -TEST_F(MatOpsSimpleTest, Max1x1Linspace) { TestLinspaceMax(1, 1); } - -TEST_F(MatOpsSimpleTest, Max2x2Linspace) { TestLinspaceMax(2, 2); } - -TEST_F(MatOpsSimpleTest, Max3x3Linspace) { TestLinspaceMax(3, 3); } - -TEST_F(MatOpsSimpleTest, Max4x4Linspace) { TestLinspaceMax(4, 4); } - -TEST_F(MatOpsSimpleTest, Max6x6Linspace) { TestLinspaceMax(6, 6); } - -TEST_F(MatOpsSimpleTest, Max8x8Linspace) { TestLinspaceMax(8, 8); } - -TEST_F(MatOpsSimpleTest, Max12x12Linspace) { TestLinspaceMax(12, 12); } - -TEST_F(MatOpsSimpleTest, Max16x16Linspace) { TestLinspaceMax(16, 16); } +struct TestLinspaceMaxParam { + int64 rows; + int64 cols; +}; -TEST_F(MatOpsSimpleTest, Max32x8Linspace) { TestLinspaceMax(32, 8); } +class TestLinspaceMaxParametric + : public MatOpsSimpleTest, + public ::testing::WithParamInterface { + public: + template + void TestImpl() { + TestLinspaceMaxParam param = GetParam(); + int64 rows = param.rows; + int64 cols = param.cols; + float from = -128.0, to = 256.0; + std::unique_ptr> alhs = + MakeLinspaceArray2D(from, to, rows, cols); + auto arhs = MakeUnique>(rows, cols, static_cast(1.0f)); -TEST_F(MatOpsSimpleTest, Max64x8Linspace) { TestLinspaceMax(64, 8); } + ComputationBuilder builder( + client_, + tensorflow::strings::Printf("max_%lldx%lld_linspace", rows, cols)); + auto lhs = builder.ConstantR2FromArray2D(*alhs); + auto rhs = builder.ConstantR2FromArray2D(*arhs); + auto max = builder.Max(lhs, rhs); -class MatOpsDotAddTest - : public ClientLibraryTestBase, - public ::testing::WithParamInterface> {}; - -TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2) { - bool row_major = std::get<0>(GetParam()); - bool add_lhs = std::get<1>(GetParam()); - bool transpose = std::get<2>(GetParam()); - Array2D lhs({{1.0, 2.0}, {3.0, 4.0}}); - Array2D rhs({{10.0, 11.0}, {12.0, 13.0}}); - - auto minor_to_major = [](bool row_major) -> std::vector { - return {row_major ? 1 : 0, row_major ? 0 : 1}; - }; - - auto prim_type = primitive_util::NativeToPrimitiveType(); - Shape lhs_shape = - ShapeUtil::MakeShape(prim_type, {lhs.height(), lhs.width()}); - Shape rhs_shape = - ShapeUtil::MakeShape(prim_type, {rhs.height(), rhs.width()}); - - TF_ASSERT_OK_AND_ASSIGN( - auto lhs_handle, - client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( - lhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - TF_ASSERT_OK_AND_ASSIGN( - auto rhs_handle, - client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( - rhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - - ComputationBuilder builder(client_, TestName()); - auto lhs_arg = builder.Parameter(0, lhs_shape, "lhs"); - auto lhs_mat_arg = lhs_arg; - if (transpose) { - lhs_mat_arg = builder.Transpose(lhs_mat_arg, {1, 0}); - } - auto rhs_arg = builder.Parameter(1, rhs_shape, "rhs"); - auto result = builder.Dot(lhs_mat_arg, rhs_arg); - Array2D expected; - if (add_lhs) { - result = builder.Add(result, lhs_arg); - if (transpose) { - expected = Array2D({{47, 52}, {71, 78}}); - } else { - expected = Array2D({{35, 39}, {81, 89}}); + Array2D expected(rows, cols); + for (int row = 0; row < rows; ++row) { + for (int col = 0; col < cols; ++col) { + expected(row, col) = std::max((*alhs)(row, col), (*arhs)(row, col)); + } } - } else { - result = builder.Add(result, rhs_arg); - if (transpose) { - expected = Array2D({{56, 61}, {80, 87}}); - } else { - expected = Array2D({{44, 48}, {90, 98}}); + ErrorSpec error_spec(1e-6); + if (std::is_same::value) { + error_spec = ErrorSpec(1e-6, 2e-4); } + ComputeAndCompareR2(&builder, expected, {}, error_spec); } +}; - ComputeAndCompareR2(&builder, expected, - {lhs_handle.get(), rhs_handle.get()}, - ErrorSpec(1e-6)); +string PrintTestLinspaceMaxParam( + const ::testing::TestParamInfo& test_param) { + const TestLinspaceMaxParam& param = test_param.param; + return tensorflow::strings::StrCat(param.rows, "r", param.cols, "c"); } -INSTANTIATE_TEST_CASE_P(MatOpsDotAddTestInstances, MatOpsDotAddTest, - ::testing::Combine(::testing::Bool(), ::testing::Bool(), - ::testing::Bool())); +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +// TODO(bixia): This test failed on GPU 02-25-2018 +#ifdef XLA_TEST_BACKEND_CPU +XLA_TEST_P(TestLinspaceMaxParametric, TestF16) { TestImpl(); } +#endif +#endif +XLA_TEST_P(TestLinspaceMaxParametric, TestF32) { TestImpl(); } + +INSTANTIATE_TEST_CASE_P( + TestLinspaceMax, TestLinspaceMaxParametric, + ::testing::Values(TestLinspaceMaxParam{1, 1}, TestLinspaceMaxParam{2, 2}, + TestLinspaceMaxParam{3, 3}, TestLinspaceMaxParam{4, 4}, + TestLinspaceMaxParam{6, 6}, TestLinspaceMaxParam{8, 8}, + TestLinspaceMaxParam{12, 12}, + TestLinspaceMaxParam{16, 16}, TestLinspaceMaxParam{32, 8}, + TestLinspaceMaxParam{64, 8}), + PrintTestLinspaceMaxParam); -class MatOpsDotAddTest_bf16 +class MatOpsDotAddTest : public ClientLibraryTestBase, - public ::testing::WithParamInterface> {}; - -TEST_P(MatOpsDotAddTest_bf16, Dot_Add_2x2_2x2) { - bool row_major = std::get<0>(GetParam()); - bool add_lhs = std::get<1>(GetParam()); - bool transpose = std::get<2>(GetParam()); - Array2D lhs( - {{bfloat16(1.0f), bfloat16(2.0f)}, {bfloat16(3.0), bfloat16(4.0)}}); - Array2D rhs( - {{bfloat16(10.0f), bfloat16(11.0f)}, {bfloat16(12.0f), bfloat16(13.0f)}}); - - auto minor_to_major = [](bool row_major) -> std::vector { - return {row_major ? 1 : 0, row_major ? 0 : 1}; - }; - - auto prim_type = primitive_util::NativeToPrimitiveType(); - Shape lhs_shape = - ShapeUtil::MakeShape(prim_type, {lhs.height(), lhs.width()}); - Shape rhs_shape = - ShapeUtil::MakeShape(prim_type, {rhs.height(), rhs.width()}); - - TF_ASSERT_OK_AND_ASSIGN( - auto lhs_handle, - client_->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - lhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - TF_ASSERT_OK_AND_ASSIGN( - auto rhs_handle, - client_->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - rhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - - ComputationBuilder builder(client_, TestName()); - auto lhs_arg = builder.Parameter(0, lhs_shape, "lhs"); - auto lhs_mat_arg = lhs_arg; - if (transpose) { - lhs_mat_arg = builder.Transpose(lhs_mat_arg, {1, 0}); - } - auto rhs_arg = builder.Parameter(1, rhs_shape, "rhs"); - auto result = builder.Dot(lhs_mat_arg, rhs_arg); - Array2D expected; - if (add_lhs) { - result = builder.Add(result, lhs_arg); + public ::testing::WithParamInterface> { + public: + template + void TestImpl() { + bool row_major = std::get<0>(GetParam()); + bool add_lhs = std::get<1>(GetParam()); + bool transpose = std::get<2>(GetParam()); + Array2D lhs({{1.0f, 2.0f}, {3.0f, 4.0f}}); + Array2D rhs({{10.0f, 11.0f}, {12.0f, 13.0f}}); + + auto minor_to_major = [](bool row_major) -> std::vector { + return {row_major ? 1 : 0, row_major ? 0 : 1}; + }; + + auto prim_type = primitive_util::NativeToPrimitiveType(); + Shape lhs_shape = + ShapeUtil::MakeShape(prim_type, {lhs.height(), lhs.width()}); + Shape rhs_shape = + ShapeUtil::MakeShape(prim_type, {rhs.height(), rhs.width()}); + + TF_ASSERT_OK_AND_ASSIGN( + auto lhs_handle, + client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + lhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); + TF_ASSERT_OK_AND_ASSIGN( + auto rhs_handle, + client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + rhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); + + ComputationBuilder builder(client_, TestName()); + auto lhs_arg = builder.Parameter(0, lhs_shape, "lhs"); + auto lhs_mat_arg = lhs_arg; if (transpose) { - expected = Array2D( - {{bfloat16(47), bfloat16(52)}, {bfloat16(71), bfloat16(78)}}); - } else { - expected = Array2D( - {{bfloat16(35), bfloat16(39)}, {bfloat16(81), bfloat16(89)}}); + lhs_mat_arg = builder.Transpose(lhs_mat_arg, {1, 0}); } - } else { - result = builder.Add(result, rhs_arg); - if (transpose) { - expected = Array2D( - {{bfloat16(56), bfloat16(61)}, {bfloat16(80), bfloat16(87)}}); + auto rhs_arg = builder.Parameter(1, rhs_shape, "rhs"); + auto result = builder.Dot(lhs_mat_arg, rhs_arg); + Array2D expected; + if (add_lhs) { + result = builder.Add(result, lhs_arg); + if (transpose) { + expected = Array2D({{47.0f, 52.0f}, {71.0f, 78.0f}}); + } else { + expected = Array2D({{35.0f, 39.0f}, {81.0f, 89.0f}}); + } } else { - expected = Array2D( - {{bfloat16(44), bfloat16(48)}, {bfloat16(90), bfloat16(98)}}); + result = builder.Add(result, rhs_arg); + if (transpose) { + expected = Array2D({{56.0f, 61.0f}, {80.0f, 87.0f}}); + } else { + expected = Array2D({{44.0f, 48.0f}, {90.0f, 98.0f}}); + } } + + ComputeAndCompareR2(&builder, expected, + {lhs_handle.get(), rhs_handle.get()}, + ErrorSpec(1e-6)); } +}; - ComputeAndCompareR2(&builder, expected, - {lhs_handle.get(), rhs_handle.get()}, - ErrorSpec(1e-6)); -} +XLA_TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2BF16) { TestImpl(); } +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +XLA_TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2F16) { TestImpl(); } +#endif +XLA_TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2F32) { TestImpl(); } -INSTANTIATE_TEST_CASE_P(MatOpsDotAddTestInstances, MatOpsDotAddTest_bf16, +INSTANTIATE_TEST_CASE_P(MatOpsDotAddTestInstances, MatOpsDotAddTest, ::testing::Combine(::testing::Bool(), ::testing::Bool(), ::testing::Bool())); diff --git a/tensorflow/stream_executor/blas.cc b/tensorflow/stream_executor/blas.cc index da09d84921..31724cf6c9 100644 --- a/tensorflow/stream_executor/blas.cc +++ b/tensorflow/stream_executor/blas.cc @@ -79,6 +79,8 @@ string ComputationTypeString(ComputationType ty) { return "f32"; case ComputationType::kF64: return "f64"; + case ComputationType::kI32: + return "i32"; case ComputationType::kComplexF32: return "complex f32"; case ComputationType::kComplexF64: @@ -88,6 +90,10 @@ string ComputationTypeString(ComputationType ty) { } } +std::ostream& operator<<(std::ostream& os, ComputationType ty) { + return os << ComputationTypeString(ty); +} + } // namespace blas } // namespace gputools } // namespace perftools diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h index 072f085546..c5f778a5c7 100644 --- a/tensorflow/stream_executor/blas.h +++ b/tensorflow/stream_executor/blas.h @@ -104,6 +104,8 @@ enum class ComputationType { // Converts a ComputationType to a string. string ComputationTypeString(ComputationType ty); +std::ostream &operator<<(std::ostream &os, ComputationType ty); + // Opaque identifier for an "algorithm" used by a blas routine. This functions // as a hint to the blas library. typedef int64 AlgorithmType; diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc index 44a3a745ad..c563f8f931 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.cc +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc @@ -13,17 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Include cuBLAS headers early, and then set EIGEN_HAS_CUDA_FP16 -// if we have new enough CUDA (which we will only know after including -// cuda.h). This ensures that Eigen's Half.h does not attempt to make its own -// __half typedef if CUDA has already defined one (and conversely, that we do -// not include after Half.h has made its typedef). -#include "cuda/include/cuda.h" #include "cuda/include/cublas_v2.h" - -#if CUDA_VERSION >= 7050 -#define EIGEN_HAS_CUDA_FP16 -#endif +#include "cuda/include/cuda.h" #if CUDA_VERSION >= 8000 #define SE_CUDA_DATA_HALF CUDA_R_16F @@ -33,6 +24,34 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_blas.h" +// Both Eigen Half.h and CUDA cuda_fp16.h provide similar typedef for __half. As +// such, there are two ways to get the typedef for __half: +// +// (1) Includes cuda_fp16.h and defines EIGEN_HAS_CUDA_FP16. +// (2) Neither includes cuda_fp16.h nor defines EIGEN_HAS_CUDA_FP16. +// +// Due to issue b/73793421, when the first approach is used and NVCC is used to +// compile this file, NVCC will complain duplicated definition for +// EIGEN_HAS_CUDA_FP16. On the other hand, when the second approach is used and +// clang is used to compile this file, clang will not understand __half +// due to missing the definition and macro EIGEN_HAS_CUDA_FP16. +// +// Because this file may be compiled with CLANG but will never be compiled with +// NVCC, we choose the first approach for CUDA < 9.0. For CUDA >= 9.0, we have +// to use the second approach because the data member in the __half defined +// by CUDA > 9.0 is `__x` while Eigen expects it to be `x`. +// +// TODO(b/73793421): Remove the following code block to switch to the second +// approach when the issue is fixed. +#if CUDA_VERSION < 9000 +#include "cuda/include/cuda_fp16.h" +#if CUDA_VERSION >= 7050 +#define EIGEN_HAS_CUDA_FP16 +#endif +#endif + +#include "third_party/eigen3/Eigen/Core" + #include #include @@ -2256,6 +2275,14 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( DeviceMemory *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { + if (computation_type == blas::ComputationType::kF32) { + return DoBlasGemmWithAlgorithmImpl( + stream, transa, transb, m, n, k, static_cast(alpha), a, lda, b, + ldb, static_cast(beta), c, ldc, computation_type, algorithm, + output_profile_result); + } + + CHECK_EQ(computation_type, blas::ComputationType::kF16); return DoBlasGemmWithAlgorithmImpl( stream, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, computation_type, algorithm, output_profile_result); -- GitLab From 757a71e886fb9328b19b0ba15658e49cfa7cc323 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Wed, 28 Feb 2018 13:00:30 -0800 Subject: [PATCH 160/311] Lift ops to the global graph if all graphs are building functions This change ensures that, when all graphs are building functions, `init_scope` lifts ops into the global graph. PiperOrigin-RevId: 187370367 --- tensorflow/python/framework/ops.py | 60 +++++++++++++++---------- tensorflow/python/framework/ops_test.py | 31 +++++++++---- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index b0d2704c07..735ba316d0 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5103,38 +5103,50 @@ def init_scope(): """ # pylint: enable=g-doc-return-or-yield,line-too-long - in_graph_mode = context.in_graph_mode() - # Retrieve the active name scope: entering an `init_scope` preserves - # the name scope of the current context. - if in_graph_mode: + if context.in_eager_mode(): + # Fastpath. + with tape.stop_recording(): + yield + else: + # Retrieve the active name scope: entering an `init_scope` preserves + # the name scope of the current context. default_graph = get_default_graph() scope = default_graph.get_name_scope() - else: - scope = context.context().scope_name - if scope and scope[-1] != '/': - # Names that end with trailing slashes are treated by `name_scope` as - # absolute. - scope = scope + '/' - - outer_context = None - if in_graph_mode and not _default_graph_stack.stack: - outer_context = default_graph.as_default - else: - for stack_entry in reversed(context.context_stack.stack): - if not stack_entry.is_building_function: - outer_context = stack_entry.enter_context_fn - break + if scope and scope[-1] != '/': + # Names that end with trailing slashes are treated by `name_scope` as + # absolute. + scope = scope + '/' + + outer_context = None + if not _default_graph_stack.stack: + # If the default graph stack is empty, then we cannot be building a + # function. Install the global graph (which, in this case, is also the + # default graph) as the outer context. + if default_graph.building_function: + raise RuntimeError("The global graph is building a function.") + outer_context = default_graph.as_default + else: + # Find a context that is not building a function. + for stack_entry in reversed(context.context_stack.stack): + if not stack_entry.is_building_function: + outer_context = stack_entry.enter_context_fn + break - if outer_context is None: - raise AssertionError("All graphs are building functions, and no " + if outer_context is None: + # As a last resort, obtain the global default graph; this graph doesn't + # necessarily live on the graph stack (and hence it doesn't necessarily + # live on the context stack), but it is stored in the graph stack's + # encapsulating object. + outer_context = _default_graph_stack._GetGlobalDefaultGraph().as_default # pylint: disable=protected-access + + if outer_context is None: + # Sanity check; this shouldn't be triggered. + raise RuntimeError("All graphs are building functions, and no " "eager context was previously active.") - try: with outer_context(), name_scope(scope), control_dependencies( None), tape.stop_recording(): yield - finally: - pass def enable_eager_execution(config=None, device_policy=None): diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index a141fe6340..1f2dfb8d43 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2156,14 +2156,6 @@ class InitScopeTest(test_util.TensorFlowTestCase): self.assertIs(g, ops.get_default_graph()) self.assertTrue(context.in_graph_mode()) - def testAllGraphsBuildingFunctionsRaisesError(self): - g = ops.Graph() - g._building_function = True # pylint: disable=protected-access - with g.as_default(): - with self.assertRaises(AssertionError): - with ops.init_scope(): - pass - def testStaysInEagerWhenOnlyEagerContextActive(self): with context.eager_mode(): with ops.init_scope(): @@ -2241,6 +2233,29 @@ class InitScopeTest(test_util.TensorFlowTestCase): self.assertEqual(4, int(compiled_outer(inner=compiled_inner))) self.assertEqual(7, int(compiled_outer(inner=compiled_inner))) + def testFallsBackToGlobalGraphWhenAllGraphsAreBuildingFunctions(self): + with context.graph_mode(): + ops.reset_default_graph() + # This doesn't push anything onto the graph stack, but it does + # set the stack's global graph. + global_graph = ops.get_default_graph() + fn_graph = ops.Graph() + + # pylint: disable=protected-access + fn_graph._building_function = True + self.assertEqual(len(ops._default_graph_stack.stack), 0) + with fn_graph.as_default(): + self.assertEqual(len(ops._default_graph_stack.stack), 1) + with ops.init_scope(): + self.assertGreater(len(ops._default_graph_stack.stack), 1) + dummy = constant_op.constant(1.0) + self.assertEqual(len(ops._default_graph_stack.stack), 1) + # Note that the global graph is _not_ on the graph stack. + self.assertEqual(len(ops._default_graph_stack.stack), 0) + # Ensure that `dummy` was added to the global graph. + self.assertEqual(global_graph, dummy.graph) + # pylint: enable=protected-access + def testInstallsDefaultGraphWhenGraphStackIsEmptyInGraphMode(self): with context.graph_mode(): # pylint: disable=protected-access -- GitLab From 69f674b473470b44c6a1ca1bbb3bcc6a8c53074b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 13:02:07 -0800 Subject: [PATCH 161/311] Factor out the LstmBatchStep for the various LSTM Ops. PiperOrigin-RevId: 187370622 --- .../kernels/bidirectional_sequence_lstm.cc | 183 ++---------------- .../lite/kernels/internal/kernel_utils.cc | 147 ++++++++++++++ .../lite/kernels/internal/kernel_utils.h | 36 ++++ tensorflow/contrib/lite/kernels/lstm.cc | 170 +++++----------- .../kernels/unidirectional_sequence_lstm.cc | 179 +++++------------ 5 files changed, 294 insertions(+), 421 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc index 8d70df5e21..a64ac42bc4 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h" #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" @@ -443,166 +444,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -// Performs an LSTM batch inference step for input specified by input_ptr_batch. -// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and -// biases (*_bias_ptr), and buffers (*_scratch), along with additional -// parameters: -// - params: various LSTM params including activation, clipping, etc., -// - use_cifg: use coupled input forget gates, -// - use_peephole: whether to use peephole connection or not, -// - n_batch: size of batch, -// - n_cell: number of cells (or units), -// - n_input: the input size, -// - n_output: the output size. -// -// The pointers to the hidden state and the output are updated as a result. -// -// The pointers with the suffix "_batch" point to data aligned in batch_major -// order, and each step processes batch_size many inputs from input_ptr_batch, -// and updates batch_size many outputs and hidden states. -void LstmBatchStep( - const float* input_ptr_batch, const float* input_to_input_weights_ptr, - const float* input_to_forget_weights_ptr, - const float* input_to_cell_weights_ptr, - const float* input_to_output_weights_ptr, - const float* recurrent_to_input_weights_ptr, - const float* recurrent_to_forget_weights_ptr, - const float* recurrent_to_cell_weights_ptr, - const float* recurrent_to_output_weights_ptr, - const float* cell_to_input_weights_ptr, - const float* cell_to_forget_weights_ptr, - const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, - const float* forget_gate_bias_ptr, const float* cell_bias_ptr, - const float* output_gate_bias_ptr, const float* projection_weights_ptr, - const float* projection_bias_ptr, const TfLiteLSTMParams* params, - bool use_cifg, bool use_peephole, int n_batch, int n_cell, int n_input, - int n_output, float* output_state_ptr, float* cell_state_ptr, - float* input_gate_scratch, float* forget_gate_scratch, float* cell_scratch, - float* output_gate_scratch, float* output_ptr_time) { - // Initialize scratch buffers with bias. - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch, - input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, - forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch, - cell_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, - output_gate_scratch); - - // For each batch and cell: compute input_weight * input. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: compute recurrent_weight * output_state. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, input_gate_scratch, - /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, forget_gate_scratch, - /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, output_gate_scratch, - /*result_stride=*/1); - - // For each batch and cell: update input gate. - if (!use_cifg) { - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch, - input_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, - input_gate_scratch); - } - - // For each batch and cell: update forget gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch, - forget_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, - forget_gate_scratch); - - // For each batch and cell: update the cell. - tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr, - n_batch * n_cell, cell_state_ptr); - tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, - params->activation, cell_scratch); - if (use_cifg) { - tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, - forget_gate_scratch); - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr); - } else { - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr); - } - if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, - params->cell_clip, cell_state_ptr); - } - - // For each batch and cell: update the output gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch, - output_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, - output_gate_scratch); - tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell, - params->activation, cell_scratch); - tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, - n_batch * n_cell, output_gate_scratch); - - // For each batch: update the projection and output_state. - const bool use_projection_weight = (projection_weights_ptr != nullptr); - const bool use_projection_bias = (projection_bias_ptr != nullptr); - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, - n_batch, output_ptr_time); - } else { - tensor_utils::ZeroVector(output_ptr_time, n_batch * n_output); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch, - output_ptr_time, /*result_stride=*/1); - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_ptr_time, n_batch * n_output, - params->proj_clip, output_ptr_time); - } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output_ptr_time); - } - tensor_utils::CopyVector(output_ptr_time, n_batch * n_output, - output_state_ptr); -} - // The LSTM Op engine. TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); @@ -756,7 +597,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const float* input_ptr_batch = input->data.f + t * n_batch * n_input; float* output_ptr_time = fw_output->data.f + t * n_batch * n_fw_output; - LstmBatchStep( + kernel_utils::LstmStep( input_ptr_batch, fw_input_to_input_weights_ptr, fw_input_to_forget_weights->data.f, fw_input_to_cell_weights->data.f, fw_input_to_output_weights->data.f, fw_recurrent_to_input_weights_ptr, @@ -766,11 +607,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { fw_cell_to_forget_weights_ptr, fw_cell_to_output_weights_ptr, fw_input_gate_bias_ptr, fw_forget_gate_bias->data.f, fw_cell_bias->data.f, fw_output_gate_bias->data.f, - fw_projection_weights_ptr, fw_projection_bias_ptr, params, fw_use_cifg, - fw_use_peephole, n_batch, n_fw_cell, n_input, n_fw_output, - fw_output_state->data.f, fw_cell_state->data.f, fw_input_gate_scratch, - fw_forget_gate_scratch, fw_cell_scratch, fw_output_gate_scratch, - output_ptr_time); + fw_projection_weights_ptr, fw_projection_bias_ptr, params, n_batch, + n_fw_cell, n_input, n_fw_output, fw_output_state->data.f, + fw_cell_state->data.f, fw_input_gate_scratch, fw_forget_gate_scratch, + fw_cell_scratch, fw_output_gate_scratch, output_ptr_time); } // n_cell and n_output will be the same size when there is no projection. @@ -828,7 +668,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const float* input_ptr_batch = input->data.f + t * n_batch * n_input; float* output_ptr_time = bw_output->data.f + t * n_batch * n_bw_output; - LstmBatchStep( + kernel_utils::LstmStep( input_ptr_batch, bw_input_to_input_weights_ptr, bw_input_to_forget_weights->data.f, bw_input_to_cell_weights->data.f, bw_input_to_output_weights->data.f, bw_recurrent_to_input_weights_ptr, @@ -838,11 +678,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { bw_cell_to_forget_weights_ptr, bw_cell_to_output_weights_ptr, bw_input_gate_bias_ptr, bw_forget_gate_bias->data.f, bw_cell_bias->data.f, bw_output_gate_bias->data.f, - bw_projection_weights_ptr, bw_projection_bias_ptr, params, bw_use_cifg, - bw_use_peephole, n_batch, n_bw_cell, n_input, n_bw_output, - bw_output_state->data.f, bw_cell_state->data.f, bw_input_gate_scratch, - bw_forget_gate_scratch, bw_cell_scratch, bw_output_gate_scratch, - output_ptr_time); + bw_projection_weights_ptr, bw_projection_bias_ptr, params, n_batch, + n_bw_cell, n_input, n_bw_output, bw_output_state->data.f, + bw_cell_state->data.f, bw_input_gate_scratch, bw_forget_gate_scratch, + bw_cell_scratch, bw_output_gate_scratch, output_ptr_time); } // Backward step. diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 510395126c..f142374269 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -40,5 +40,152 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, hidden_state_ptr_batch); } +void LstmStep( + const float* input_ptr_batch, const float* input_to_input_weights_ptr, + const float* input_to_forget_weights_ptr, + const float* input_to_cell_weights_ptr, + const float* input_to_output_weights_ptr, + const float* recurrent_to_input_weights_ptr, + const float* recurrent_to_forget_weights_ptr, + const float* recurrent_to_cell_weights_ptr, + const float* recurrent_to_output_weights_ptr, + const float* cell_to_input_weights_ptr, + const float* cell_to_forget_weights_ptr, + const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const float* projection_weights_ptr, + const float* projection_bias_ptr, const TfLiteLSTMParams* params, + int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr, + float* cell_state_ptr, float* input_gate_scratch, + float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, + float* output_ptr_batch) { + // Since we have already checked that weights are all there or none, we can + // check the existense of only one to the get the condition. + const bool use_cifg = (input_to_input_weights_ptr == nullptr); + const bool use_peephole = (cell_to_output_weights_ptr != nullptr); + // Initialize scratch buffers with bias. + if (!use_cifg) { + tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch, + input_gate_scratch); + } + tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, + forget_gate_scratch); + tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch, + cell_scratch); + tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, + output_gate_scratch); + + // For each batch and cell: compute input_weight * input. + if (!use_cifg) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + forget_gate_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + cell_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + output_gate_scratch, /*result_stride=*/1); + + // For each batch and cell: compute recurrent_weight * output_state. + if (!use_cifg) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, input_gate_scratch, + /*result_stride=*/1); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, forget_gate_scratch, + /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, cell_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, output_gate_scratch, + /*result_stride=*/1); + + // For each batch and cell: update input gate. + if (!use_cifg) { + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch, + input_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, + input_gate_scratch); + } + + // For each batch and cell: update forget gate. + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch, + forget_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, + forget_gate_scratch); + + // For each batch and cell: update the cell. + tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr, + n_batch * n_cell, cell_state_ptr); + tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, + params->activation, cell_scratch); + if (use_cifg) { + tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, + forget_gate_scratch); + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr); + } else { + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr); + } + if (params->cell_clip > 0.0) { + tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, + params->cell_clip, cell_state_ptr); + } + + // For each batch and cell: update the output gate. + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch, + output_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, + output_gate_scratch); + tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell, + params->activation, cell_scratch); + tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, + n_batch * n_cell, output_gate_scratch); + + // For each batch: update the projection and output_state. + const bool use_projection_weight = (projection_weights_ptr != nullptr); + const bool use_projection_bias = (projection_bias_ptr != nullptr); + if (use_projection_weight) { + if (use_projection_bias) { + tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, + n_batch, output_ptr_batch); + } else { + tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch, + output_ptr_batch, /*result_stride=*/1); + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, + params->proj_clip, output_ptr_batch); + } + } else { + tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, + output_ptr_batch); + } + tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, + output_state_ptr); +} + } // namespace kernel_utils } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h index 9872d4500b..3ec60ee57a 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h @@ -35,6 +35,42 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, TfLiteFusedActivation activation, float* hidden_state_ptr_batch, float* output_ptr_batch); +// Performs an LSTM batch inference step for input specified by input_ptr_batch. +// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and +// biases (*_bias_ptr), and buffers (*_scratch), along with additional +// parameters: +// - params: various LSTM params including activation, clipping, etc., +// - n_batch: size of batch, +// - n_cell: number of cells (or units), +// - n_input: the input size, +// - n_output: the output size. +// +// The pointers to the cell and output state and the output are updated. Unless +// projection is specified output and output state contain the same data. +// +// The pointers with the suffix "_batch" point to data aligned in batch_major +// order, and each step processes batch_size many inputs from input_ptr_batch, +// and updates batch_size many cell and output states. +void LstmStep( + const float* input_ptr_batch, const float* input_to_input_weights_ptr, + const float* input_to_forget_weights_ptr, + const float* input_to_cell_weights_ptr, + const float* input_to_output_weights_ptr, + const float* recurrent_to_input_weights_ptr, + const float* recurrent_to_forget_weights_ptr, + const float* recurrent_to_cell_weights_ptr, + const float* recurrent_to_output_weights_ptr, + const float* cell_to_input_weights_ptr, + const float* cell_to_forget_weights_ptr, + const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const float* projection_weights_ptr, + const float* projection_bias_ptr, const TfLiteLSTMParams* params, + int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr, + float* cell_state_ptr, float* input_gate_scratch, + float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, + float* output_ptr_batch); + } // namespace kernel_utils } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_KERNEL_UTILS_H_ diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 6c06264d84..b9255b23a5 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h" #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" @@ -377,127 +378,54 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch; } - // Initialize scratch buffers with bias. - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias->data.f, n_cell, - n_batch, input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias->data.f, n_cell, - n_batch, forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_bias->data.f, n_cell, n_batch, - cell_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias->data.f, n_cell, - n_batch, output_gate_scratch); - - // For each batch and cell: compute input_weight * input. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_input_weights->data.f, n_cell, n_input, input->data.f, n_batch, - input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_forget_weights->data.f, n_cell, n_input, input->data.f, n_batch, - forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_cell_weights->data.f, n_cell, n_input, input->data.f, n_batch, - cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_output_weights->data.f, n_cell, n_input, input->data.f, n_batch, - output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: compute recurrent_weight * output_state. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_input_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_forget_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_cell_weights->data.f, n_cell, n_output, output_state->data.f, - n_batch, cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_output_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: update input gate. - if (!use_cifg) { - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_input_weights->data.f, n_cell, cell_state->data.f, n_batch, - input_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, - input_gate_scratch); - } - - // For each batch and cell: update forget gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_forget_weights->data.f, n_cell, cell_state->data.f, n_batch, - forget_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, - forget_gate_scratch); - - // For each batch and cell: update the cell. - tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, - cell_state->data.f, n_batch * n_cell, - cell_state->data.f); - tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, - params->activation, cell_scratch); - if (use_cifg) { - tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, - forget_gate_scratch); - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, forget_gate_scratch, n_batch * n_cell, - cell_state->data.f); - } else { - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state->data.f); - } - if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state->data.f, n_batch * n_cell, - params->cell_clip, cell_state->data.f); - } - - // For each batch and cell: update the output gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_output_weights->data.f, n_cell, cell_state->data.f, n_batch, - output_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, - output_gate_scratch); - tensor_utils::ApplyActivationToVector(cell_state->data.f, n_batch * n_cell, - params->activation, cell_scratch); - tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, - n_batch * n_cell, output_gate_scratch); - - // For each batch: update the projection and output_state. - const bool use_projection_weight = (projection_weights != nullptr); - const bool use_projection_bias = (projection_bias != nullptr); - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias->data.f, n_output, - n_batch, output->data.f); - } else { - tensor_utils::ZeroVector(output->data.f, n_batch * n_output); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights->data.f, n_output, n_cell, output_gate_scratch, - n_batch, output->data.f, /*result_stride=*/1); - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output->data.f, n_batch * n_output, - params->proj_clip, output->data.f); - } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output->data.f); - } - tensor_utils::CopyVector(output->data.f, n_batch * n_output, - output_state->data.f); + // Check optional tensors, the respective pointers can be null. + const float* input_to_input_weights_ptr = + (use_cifg) ? nullptr : input_to_input_weights->data.f; + const float* recurrent_to_input_weights_ptr = + (use_cifg) ? nullptr : recurrent_to_input_weights->data.f; + const float* input_gate_bias_ptr = + (use_cifg) ? nullptr : input_gate_bias->data.f; + const float* cell_to_input_weights_ptr = + (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr; + const float* cell_to_forget_weights_ptr = + (use_peephole) ? cell_to_forget_weights->data.f : nullptr; + const float* cell_to_output_weights_ptr = + (use_peephole) ? cell_to_output_weights->data.f : nullptr; + const float* projection_weights_ptr = + (projection_weights == nullptr) ? nullptr : projection_weights->data.f; + const float* projection_bias_ptr = + (projection_bias == nullptr) ? nullptr : projection_bias->data.f; + + // Required tensors, pointers are non-null. + const float* input_ptr_batch = input->data.f; + const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f; + const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f; + const float* input_to_output_weights_ptr = input_to_output_weights->data.f; + const float* recurrent_to_forget_weights_ptr = + recurrent_to_forget_weights->data.f; + const float* recurrent_to_cell_weights_ptr = + recurrent_to_cell_weights->data.f; + const float* recurrent_to_output_weights_ptr = + recurrent_to_output_weights->data.f; + const float* forget_gate_bias_ptr = forget_gate_bias->data.f; + const float* cell_bias_ptr = cell_bias->data.f; + const float* output_gate_bias_ptr = output_gate_bias->data.f; + + float* output_state_ptr = output_state->data.f; + float* cell_state_ptr = cell_state->data.f; + float* output_ptr_batch = output->data.f; + + kernel_utils::LstmStep( + input_ptr_batch, input_to_input_weights_ptr, input_to_forget_weights_ptr, + input_to_cell_weights_ptr, input_to_output_weights_ptr, + recurrent_to_input_weights_ptr, recurrent_to_forget_weights_ptr, + recurrent_to_cell_weights_ptr, recurrent_to_output_weights_ptr, + cell_to_input_weights_ptr, cell_to_forget_weights_ptr, + cell_to_output_weights_ptr, input_gate_bias_ptr, forget_gate_bias_ptr, + cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr, + projection_bias_ptr, params, n_batch, n_cell, n_input, n_output, + output_state_ptr, cell_state_ptr, input_gate_scratch, forget_gate_scratch, + cell_scratch, output_gate_scratch, output_ptr_batch); return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index 9cdb58714e..508a570e2e 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h" #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" @@ -380,135 +381,57 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch; } + // Check optional tensors, the respective pointers can be null. + const float* input_to_input_weights_ptr = + (use_cifg) ? nullptr : input_to_input_weights->data.f; + const float* recurrent_to_input_weights_ptr = + (use_cifg) ? nullptr : recurrent_to_input_weights->data.f; + const float* input_gate_bias_ptr = + (use_cifg) ? nullptr : input_gate_bias->data.f; + const float* cell_to_input_weights_ptr = + (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr; + const float* cell_to_forget_weights_ptr = + (use_peephole) ? cell_to_forget_weights->data.f : nullptr; + const float* cell_to_output_weights_ptr = + (use_peephole) ? cell_to_output_weights->data.f : nullptr; + const float* projection_weights_ptr = + (projection_weights == nullptr) ? nullptr : projection_weights->data.f; + const float* projection_bias_ptr = + (projection_bias == nullptr) ? nullptr : projection_bias->data.f; + + // Required tensors, pointers are non-null. + const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f; + const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f; + const float* input_to_output_weights_ptr = input_to_output_weights->data.f; + const float* recurrent_to_forget_weights_ptr = + recurrent_to_forget_weights->data.f; + const float* recurrent_to_cell_weights_ptr = + recurrent_to_cell_weights->data.f; + const float* recurrent_to_output_weights_ptr = + recurrent_to_output_weights->data.f; + const float* forget_gate_bias_ptr = forget_gate_bias->data.f; + const float* cell_bias_ptr = cell_bias->data.f; + const float* output_gate_bias_ptr = output_gate_bias->data.f; + + float* output_state_ptr = output_state->data.f; + float* cell_state_ptr = cell_state->data.f; + for (int t = 0; t < max_time; t++) { - const float* input_ptr_time = input->data.f + t * n_batch * n_input; - // Initialize scratch buffers with bias. - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias->data.f, n_cell, - n_batch, input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias->data.f, n_cell, - n_batch, forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_bias->data.f, n_cell, n_batch, - cell_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias->data.f, n_cell, - n_batch, output_gate_scratch); - - // For each batch and cell: compute input_weight * input. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_input_weights->data.f, n_cell, n_input, input_ptr_time, - n_batch, input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_forget_weights->data.f, n_cell, n_input, input_ptr_time, - n_batch, forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_cell_weights->data.f, n_cell, n_input, input_ptr_time, n_batch, - cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_output_weights->data.f, n_cell, n_input, input_ptr_time, - n_batch, output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: compute recurrent_weight * output_state. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_input_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, input_gate_scratch, - /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_forget_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, forget_gate_scratch, - /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_cell_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_output_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, output_gate_scratch, - /*result_stride=*/1); - - // For each batch and cell: update input gate. - if (!use_cifg) { - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_input_weights->data.f, n_cell, cell_state->data.f, n_batch, - input_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, - input_gate_scratch); - } - - // For each batch and cell: update forget gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_forget_weights->data.f, n_cell, cell_state->data.f, n_batch, - forget_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, - forget_gate_scratch); - - // For each batch and cell: update the cell. - tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, - cell_state->data.f, n_batch * n_cell, - cell_state->data.f); - tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, - params->activation, cell_scratch); - if (use_cifg) { - tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, - forget_gate_scratch); - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, forget_gate_scratch, n_batch * n_cell, - cell_state->data.f); - } else { - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, input_gate_scratch, n_batch * n_cell, - cell_state->data.f); - } - if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state->data.f, n_batch * n_cell, - params->cell_clip, cell_state->data.f); - } - - // For each batch and cell: update the output gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_output_weights->data.f, n_cell, cell_state->data.f, n_batch, - output_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, - output_gate_scratch); - tensor_utils::ApplyActivationToVector(cell_state->data.f, n_batch * n_cell, - params->activation, cell_scratch); - tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, - n_batch * n_cell, - output_gate_scratch); - - // For each batch: update the projection and output_state. - const bool use_projection_weight = (projection_weights != nullptr); - const bool use_projection_bias = (projection_bias != nullptr); - float* output_ptr_time = output->data.f + t * n_batch * n_output; - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias->data.f, n_output, - n_batch, output_ptr_time); - } else { - tensor_utils::ZeroVector(output_ptr_time, n_batch * n_output); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights->data.f, n_output, n_cell, output_gate_scratch, - n_batch, output_ptr_time, /*result_stride=*/1); - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_ptr_time, n_batch * n_output, - params->proj_clip, output_ptr_time); - } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output_ptr_time); - } - tensor_utils::CopyVector(output_ptr_time, n_batch * n_output, - output_state->data.f); + const float* input_ptr_batch = input->data.f + t * n_batch * n_input; + float* output_ptr_batch = output->data.f + t * n_batch * n_output; + + kernel_utils::LstmStep( + input_ptr_batch, input_to_input_weights_ptr, + input_to_forget_weights_ptr, input_to_cell_weights_ptr, + input_to_output_weights_ptr, recurrent_to_input_weights_ptr, + recurrent_to_forget_weights_ptr, recurrent_to_cell_weights_ptr, + recurrent_to_output_weights_ptr, cell_to_input_weights_ptr, + cell_to_forget_weights_ptr, cell_to_output_weights_ptr, + input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr, + output_gate_bias_ptr, projection_weights_ptr, projection_bias_ptr, + params, n_batch, n_cell, n_input, n_output, output_state_ptr, + cell_state_ptr, input_gate_scratch, forget_gate_scratch, cell_scratch, + output_gate_scratch, output_ptr_batch); } return kTfLiteOk; } -- GitLab From c1777a2633bd5615a1d654e50f82d0cf75fd60f0 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 28 Feb 2018 13:17:06 -0800 Subject: [PATCH 162/311] [XLA] Fix up some error messages to conform to XLA's error message style. PiperOrigin-RevId: 187372860 --- tensorflow/compiler/xla/literal_util.cc | 18 +++++++++++------- tensorflow/compiler/xla/literal_util_test.cc | 10 +++++----- .../compiler/xla/service/allocation_tracker.cc | 2 +- .../compiler/xla/service/hlo_instruction.cc | 6 ++++-- .../xla/tests/deconstruct_tuple_test.cc | 2 +- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 3962a9b316..c3eb8caa57 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -343,7 +343,7 @@ Status Literal::Piece::CopyFrom(const Literal::Piece& src) { #undef COPY_ELEMENTS default: return Unimplemented( - "Unhandled primitive type %s", + "Copying a Literal object with element type %s is not implemented.", PrimitiveType_Name(subshape().element_type()).c_str()); } } @@ -491,7 +491,10 @@ Status Literal::CopySliceFrom(const Literal& src_literal, default: break; } - return Unimplemented("Unhandled primitive type %d", shape().element_type()); + return Unimplemented( + "Copying a slice from a Literal object with element type %d is not " + "implemented.", + shape().element_type()); } /* static */ Literal Literal::Zero(PrimitiveType primitive_type) { @@ -1394,8 +1397,8 @@ StatusOr> ConvertIfDestTypeMatches( return ConvertToC64(src_literal); // Other types are not yet supported. default: - return InvalidArgument( - "Unimplemented: Convert from type %s to type %s", + return Unimplemented( + "Converting from type %s to type %s is not implemented.", PrimitiveType_Name(src_literal.shape().element_type()).c_str(), PrimitiveType_Name(primitive_dest_type).c_str()); } @@ -1424,9 +1427,10 @@ StatusOr> Literal::Convert( #undef CONVERT_IF_DEST_TYPE_MATCHES // Other types are not yet supported. default: - return InvalidArgument("Unimplemented: Convert from type %s to type %s", - PrimitiveType_Name(shape().element_type()).c_str(), - PrimitiveType_Name(primitive_dest_type).c_str()); + return Unimplemented( + "Converting from type %s to type %s is not implemented.", + PrimitiveType_Name(shape().element_type()).c_str(), + PrimitiveType_Name(primitive_dest_type).c_str()); } } diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 9ff0771110..04e45f0049 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -1232,15 +1232,15 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { EXPECT_EQ(*conv, *c64); EXPECT_EQ(s32->Convert(TUPLE).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(s32->Convert(S16).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(s32->Convert(U16).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(c64->Convert(F32).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(c64->Convert(S32).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); } TEST_F(LiteralUtilTest, CopyFromProto_Bool) { diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc index 4e80679c11..7a75c02531 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.cc +++ b/tensorflow/compiler/xla/service/allocation_tracker.cc @@ -109,7 +109,7 @@ StatusOr> AllocationTracker::DeconstructTuple( TF_RET_CHECK(ShapeUtil::IsTuple(shaped_buffer->on_device_shape())); if (ShapeUtil::IsNestedTuple(shaped_buffer->on_device_shape())) { - return Unimplemented("deconstructing nested tuples not yet supported"); + return Unimplemented("Deconstructing nested tuples is not implemented."); } std::vector element_handles; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a534d8ff06..af9d772b00 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2682,8 +2682,10 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { case HloOpcode::kTrace: break; } - return Unimplemented("unhandled HloOpcode for DfsHloVisitor: %s", - HloOpcodeString(opcode_).c_str()); + return InternalError( + "Unhandled HloOpcode for DfsHloVisitor: %s. This should not happen - " + "please file a bug for XLA.", + HloOpcodeString(opcode_).c_str()); } // Explicit instantiations. diff --git a/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc b/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc index 032c06cd3c..3ab0ea4ad4 100644 --- a/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc +++ b/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc @@ -195,7 +195,7 @@ XLA_TEST_F(DeconstructTupleTest, DeconstructNestedTuple) { auto result_status = client_->DeconstructTuple(*global_data); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("deconstructing nested tuples not yet supported")); + HasSubstr("Deconstructing nested tuples is not implemented")); } } // namespace -- GitLab From c661f2c3de75e3ad58bce52b39b8cc2e7ee07c0e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 28 Feb 2018 13:19:01 -0800 Subject: [PATCH 163/311] [TF:XLA] Bump open source llvm revision to r326313 PiperOrigin-RevId: 187373178 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index fa3671b4c9..ea8f42ab8d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", ], - sha256 = "e6bb793bbdce37ee5643789a27d174f1cdd8e7323a69d5f331376eb34755ee0d", - strip_prefix = "llvm-832f2bf0d8908aea8160bab128708d521764fe8d", + sha256 = "7990b4d446de971e0acc481942920452a182d2f87a8164bdc117fd9b9ace591d", + strip_prefix = "llvm-9a6e78e4adc959d2825f7af35b4ed0e09394d840", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 0f3105c39b079d8e7741e48e3b098c47c81a453a Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Wed, 28 Feb 2018 13:43:42 -0800 Subject: [PATCH 164/311] [XLA] Add a HLO simplifier pass to fold Conditional(constant_predicate, true_computation, false_computation) to Call(predicated_computation) and finally inlined computation. PiperOrigin-RevId: 187376657 --- tensorflow/compiler/xla/service/BUILD | 35 ++++ .../xla/service/conditional_simplifier.cc | 106 ++++++++++++ .../xla/service/conditional_simplifier.h | 38 +++++ .../service/conditional_simplifier_test.cc | 153 ++++++++++++++++++ tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 2 + tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../compiler/xla/service/gpu/gpu_compiler.cc | 2 + 8 files changed, 338 insertions(+) create mode 100644 tensorflow/compiler/xla/service/conditional_simplifier.cc create mode 100644 tensorflow/compiler/xla/service/conditional_simplifier.h create mode 100644 tensorflow/compiler/xla/service/conditional_simplifier_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index e6a6e54927..e4ae812532 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1213,6 +1213,41 @@ tf_cc_test( ], ) +cc_library( + name = "conditional_simplifier", + srcs = ["conditional_simplifier.cc"], + hdrs = ["conditional_simplifier.h"], + deps = [ + ":call_inliner", + ":hlo", + ":hlo_pass", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "conditional_simplifier_test", + srcs = ["conditional_simplifier_test.cc"], + deps = [ + ":conditional_simplifier", + ":hlo", + ":hlo_matchers", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_verified_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + cc_library( name = "while_loop_simplifier", srcs = ["while_loop_simplifier.cc"], diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc new file mode 100644 index 0000000000..f35de08085 --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -0,0 +1,106 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" + +#include +#include +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" + +namespace xla { + +// Tries to replace a conditional with a call operation of the corresponding +// computation. If the given conditional has a constant predicate, tries to +// replace it with a call to its true/false computation as appropirate and then +// inline that computation. +// +// Returns true if it made a change to the graph. +static StatusOr TryRemoveConditional(HloInstruction* conditional) { + CHECK_EQ(conditional->opcode(), HloOpcode::kConditional); + // Do not remove conditionals that contain side-effecting instructions or + // have control predecessors/successors in either true/false computation. + if (!conditional->parent()->IsRemovable(conditional) || + conditional->HasSideEffect()) { + VLOG(2) << "Not attempting to remove conditional as it is not removable or " + "has side effect: " + << conditional->ToShortString(); + return false; + } + + if (conditional->operand(0)->opcode() != HloOpcode::kConstant) { + VLOG(2) << "Not attempting to remove conditional as its predicate is not a " + "compile-time constant: " + << conditional->ToShortString(); + return false; + } + + auto computation = conditional->parent(); + HloInstruction* call_op; + if (conditional->operand(0)->literal().Get({})) { + call_op = computation->AddInstruction(HloInstruction::CreateCall( + conditional->shape(), {conditional->mutable_operand(1)}, + conditional->true_computation())); + } else { + call_op = computation->AddInstruction(HloInstruction::CreateCall( + conditional->shape(), {conditional->mutable_operand(2)}, + conditional->false_computation())); + } + + TF_RETURN_IF_ERROR(computation->ReplaceInstruction(conditional, call_op)); + TF_RETURN_IF_ERROR(CallInliner::Inline(call_op).status()); + + return true; +} + +StatusOr ConditionalSimplifier::Run(HloModule* module) { + XLA_VLOG_LINES( + 3, "ConditionalSimplifier::Run(), before:\n" + module->ToString()); + bool changed = false; + + // Gather all the conditional ops in our module. We do this ahead of time so + // we don't have to worry about mutating the lists of computations or + // instructions as we iterate. + std::vector conditional_ops; + for (auto* comp : module->computations()) { + for (auto* instr : comp->instructions()) { + if (instr->opcode() == HloOpcode::kConditional) { + conditional_ops.push_back(instr); + } + } + } + + for (HloInstruction* conditional_op : conditional_ops) { + TF_ASSIGN_OR_RETURN(bool result, TryRemoveConditional(conditional_op)); + changed |= result; + } + + XLA_VLOG_LINES(3, + "ConditionalSimplifier::Run(), after:\n" + module->ToString()); + return changed; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.h b/tensorflow/compiler/xla/service/conditional_simplifier.h new file mode 100644 index 0000000000..063261e26d --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_simplifier.h @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_SIMPLIFIER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_SIMPLIFIER_H_ + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace xla { + +// HLO pass that removes kConditional with a constant predicate, replacing them +// with their true or false computation as appropriate. +class ConditionalSimplifier : public HloPassInterface { + public: + tensorflow::StringPiece name() const override { + return "simplify-conditional"; + } + StatusOr Run(HloModule* module) override; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_SIMPLIFIER_H_ diff --git a/tensorflow/compiler/xla/service/conditional_simplifier_test.cc b/tensorflow/compiler/xla/service/conditional_simplifier_test.cc new file mode 100644 index 0000000000..868348547d --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_simplifier_test.cc @@ -0,0 +1,153 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" + +#include +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +namespace op = xla::testing::opcode_matchers; + +class ConditionalSimplifierTest : public HloVerifiedTestBase { + public: + // Makes a computation that contains a conditional with constant predicate. + HloComputation* MakeConditional(HloModule* module); +}; + +HloComputation* ConditionalSimplifierTest::MakeConditional(HloModule* module) { + HloComputation::Builder builder(TestName()); + + // true_computation returns param+1. + HloComputation* true_computation; + { + HloComputation::Builder true_computation_builder(TestName() + + ".true_computation"); + auto param = + true_computation_builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(S32, {}), "param")); + auto one = true_computation_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1))); + + true_computation_builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(S32, {}), HloOpcode::kAdd, param, one)); + + true_computation = + module->AddEmbeddedComputation(true_computation_builder.Build()); + } + + // false_computation returns param+42. + HloComputation* false_computation; + { + HloComputation::Builder false_computation_builder(TestName() + + ".false_computation"); + auto param = false_computation_builder.AddInstruction( + HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(S32, {}), + "param")); + auto forty_two = false_computation_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42))); + + false_computation_builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(S32, {}), HloOpcode::kAdd, param, forty_two)); + false_computation = + module->AddEmbeddedComputation(false_computation_builder.Build()); + } + + auto false_instrn = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + auto false_param = builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(S32, {}), "false_param")); + auto one = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1))); + + builder.AddInstruction(HloInstruction::CreateConditional( + ShapeUtil::MakeShape(S32, {}), false_instrn, one, true_computation, + false_param, false_computation)); + + return module->AddEntryComputation(builder.Build()); +} + +TEST_F(ConditionalSimplifierTest, ConditionalGetsInlined) { + HloComputation* computation = MakeConditional(&module()); + ASSERT_TRUE(ConditionalSimplifier().Run(&module()).ValueOrDie()); + EXPECT_THAT(computation->root_instruction(), + op::Add(op::Parameter(), op::Constant())); +} + +TEST_F(ConditionalSimplifierTest, ConditionalWithControlDependency) { + HloComputation* computation = MakeConditional(&module()); + + auto* true_op = computation->AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(true))); + TF_ASSERT_OK( + true_op->AddControlDependencyTo(computation->root_instruction())); + + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +TEST_F(ConditionalSimplifierTest, NotRemovedIfContainsSend) { + HloComputation* computation = MakeConditional(&module()); + auto* conditional = computation->root_instruction(); + ASSERT_EQ(conditional->opcode(), HloOpcode::kConditional); + + auto* true_computation = conditional->true_computation(); + auto* send = true_computation->AddInstruction(HloInstruction::CreateSend( + true_computation->AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(true))), + /*channel_id=*/0)); + true_computation->AddInstruction(HloInstruction::CreateSendDone(send)); + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +TEST_F(ConditionalSimplifierTest, NotRemovedIfContainsRecv) { + HloComputation* computation = MakeConditional(&module()); + auto* conditional = computation->root_instruction(); + ASSERT_EQ(conditional->opcode(), HloOpcode::kConditional); + + auto* true_computation = conditional->true_computation(); + auto* recv = true_computation->AddInstruction(HloInstruction::CreateRecv( + ShapeUtil::MakeShape(F32, {1}), /*channel_id=*/0)); + true_computation->AddInstruction(HloInstruction::CreateRecvDone(recv)); + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +TEST_F(ConditionalSimplifierTest, NotRemovedIfContainsNonRemovableInstruction) { + HloComputation* computation = MakeConditional(&module()); + auto* conditional = computation->root_instruction(); + ASSERT_EQ(conditional->opcode(), HloOpcode::kConditional); + auto* false_computation = conditional->false_computation(); + false_computation->AddInstruction( + HloInstruction::CreateInfeed(ShapeUtil::MakeShape(F32, {1}), "config")); + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 4170e31527..38a54fcb64 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -105,6 +105,7 @@ cc_library( "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:buffer_liveness", "//tensorflow/compiler/xla/service:call_inliner", + "//tensorflow/compiler/xla/service:conditional_simplifier", "//tensorflow/compiler/xla/service:dot_decomposer", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:flatten_call_graph", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 387806e24a..0d15be5a23 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -47,6 +47,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h" #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h" #include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h" @@ -275,6 +276,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { pass.AddPass(); pass.AddPass(); pass.AddPass(); + pass.AddPass(); } pipeline.AddPass( [](const HloInstruction& dot, diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 9da4fb97fa..334efff1e6 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -510,6 +510,7 @@ cc_library( "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:buffer_liveness", "//tensorflow/compiler/xla/service:call_inliner", + "//tensorflow/compiler/xla/service:conditional_simplifier", "//tensorflow/compiler/xla/service:dot_decomposer", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:flatten_call_graph", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 28ebd034ee..9e37acdf31 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/dot_decomposer.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h" @@ -176,6 +177,7 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module, pass.AddPass(); pass.AddPass(); pass.AddPass(); + pass.AddPass(); } pipeline.AddPass( -- GitLab From 9d6c5a06638262f6815717c682fab29ba3524282 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 28 Feb 2018 13:48:38 -0800 Subject: [PATCH 165/311] Bypasses warnings in eager mode for converting indexed slices to tensors. PiperOrigin-RevId: 187377370 --- tensorflow/python/ops/gradients_impl.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 1418c0b10f..227316a01e 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -86,17 +86,19 @@ def _IndexedSlicesToTensor(value, dtype=None, name=None, as_ref=False): % str(value)) # TODO(mrry): Consider adding static shape information to # IndexedSlices, to avoid using numpy here. - dense_shape_value = tensor_util.constant_value(value.dense_shape) - if dense_shape_value is not None: - num_elements = np.prod(dense_shape_value) - if num_elements >= _LARGE_SPARSE_NUM_ELEMENTS: + if context.in_graph_mode(): + dense_shape_value = tensor_util.constant_value(value.dense_shape) + if dense_shape_value is not None: + num_elements = np.prod(dense_shape_value) + if num_elements >= _LARGE_SPARSE_NUM_ELEMENTS: + warnings.warn( + "Converting sparse IndexedSlices to a dense Tensor with %d " + "elements. This may consume a large amount of memory." % + num_elements) + else: warnings.warn( - "Converting sparse IndexedSlices to a dense Tensor with %d elements. " - "This may consume a large amount of memory." % num_elements) - else: - warnings.warn( - "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " - "This may consume a large amount of memory.") + "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " + "This may consume a large amount of memory.") return math_ops.unsorted_segment_sum( value.values, value.indices, value.dense_shape[0], name=name) -- GitLab From a72ece230eb46c1afcb96c52dc5ae6ceabdeaf25 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 28 Feb 2018 13:55:35 -0800 Subject: [PATCH 166/311] Checkpointable: Handle Optimizer non-slot variables Overrides the Checkpointable dependency-gathering logic to key Optimizer dependencies to the current graph. Moves my Checkpointable Adam prototype out of contrib. Right now there is no check that loading all happens in the same graph. This would be easy enough to do (save a Graph ID with the _Checkpoint object), but it's not clear to me that it's useful; doing deferred restoration in whichever graph the variable is created in seems reasonable. (Let me know if you disagree) PiperOrigin-RevId: 187378372 --- .../eager/python/checkpointable_utils_test.py | 115 +++++++++++------- tensorflow/python/ops/variables.py | 3 + tensorflow/python/training/checkpointable.py | 96 ++++++++++++--- tensorflow/python/training/optimizer.py | 48 +++++++- ...tensorflow.train.-adadelta-optimizer.pbtxt | 1 - ...sorflow.train.-adagrad-d-a-optimizer.pbtxt | 1 - .../tensorflow.train.-adagrad-optimizer.pbtxt | 1 - .../tensorflow.train.-adam-optimizer.pbtxt | 1 - .../tensorflow.train.-ftrl-optimizer.pbtxt | 1 - ...ow.train.-gradient-descent-optimizer.pbtxt | 1 - ...tensorflow.train.-momentum-optimizer.pbtxt | 1 - .../golden/tensorflow.train.-optimizer.pbtxt | 1 - ...ow.train.-proximal-adagrad-optimizer.pbtxt | 1 - ...-proximal-gradient-descent-optimizer.pbtxt | 1 - ...nsorflow.train.-r-m-s-prop-optimizer.pbtxt | 1 - ...rflow.train.-sync-replicas-optimizer.pbtxt | 1 - 16 files changed, 196 insertions(+), 78 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 68f0d93632..7367f1b71c 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -23,6 +23,7 @@ import six from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.eager.python import network as network_lib +from tensorflow.python.client import session as session_lib from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import constant_op @@ -56,40 +57,6 @@ class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): return super(CheckpointableNetwork, self).track_layer(layer) -class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable): - - # NOTE: Copied from Optimizer with modifications to use add_variable - # for non-slot variables. These contortions are necessary to maintain - # checkpoint compatibility with variable.name based saving. - # TODO(allenl): Make this cleaner. - def _create_non_slot_variable(self, initial_value, name, colocate_with): - """Add an extra variable, not associated with a slot.""" - if context.in_graph_mode(): - graph = colocate_with.graph - else: - graph = None - - key = (name, graph) - v = self._non_slot_dict.get(key, None) - if v is None: - with ops.colocate_with(colocate_with): - def _variable_getter(name, shape, dtype, initializer): - del shape, dtype # not used, but there for compatibility - return variable_scope.variable( - name=name, initial_value=initializer, trainable=False) - - initial_value = ops.convert_to_tensor(initial_value) - v = self._add_variable_with_custom_getter( - name=name, - shape=initial_value.get_shape(), - initializer=initial_value, - getter=_variable_getter) - - self._non_slot_dict[key] = v - - return v - - class NonLayerCheckpointable(checkpointable.Checkpointable): def __init__(self): @@ -208,7 +175,7 @@ class CheckpointingTests(test.TestCase): # A nuisance Network using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) @@ -314,7 +281,7 @@ class CheckpointingTests(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testSaveRestore(self): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network) input_value = constant_op.constant([[3.]]) @@ -346,7 +313,7 @@ class CheckpointingTests(test.TestCase): if context.in_graph_mode(): return # Restore-on-create is only supported when executing eagerly on_create_network = MyNetwork() - on_create_optimizer = CheckpointableAdam(0.001) + on_create_optimizer = adam.AdamOptimizer(0.001) on_create_root = checkpointable_utils.Checkpoint( optimizer=on_create_optimizer, network=on_create_network) # Deferred restoration @@ -378,7 +345,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=training_util.get_or_create_global_step()) @@ -402,7 +369,7 @@ class CheckpointingTests(test.TestCase): for training_continuation in range(3): with ops.Graph().as_default(): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) @@ -439,7 +406,7 @@ class CheckpointingTests(test.TestCase): with ops.Graph().as_default(), self.test_session( graph=ops.get_default_graph()): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) @@ -573,7 +540,7 @@ class CheckpointingTests(test.TestCase): root = checkpointable.Checkpointable() root.var = checkpointable_utils.add_variable( root, name="var", initializer=0.) - optimizer = CheckpointableAdam(0.1) + optimizer = adam.AdamOptimizer(0.1) if context.in_graph_mode(): train_op = optimizer.minimize(root.var) # Note that `optimizer` has not been added as a dependency of @@ -607,7 +574,7 @@ class CheckpointingTests(test.TestCase): no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) - new_root.optimizer = CheckpointableAdam(0.1) + new_root.optimizer = adam.AdamOptimizer(0.1) with self.assertRaisesRegexp(AssertionError, "beta1_power"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) @@ -819,7 +786,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = checkpointable.Checkpointable() obj.var = variable_scope.get_variable(name="v", initializer=0.) - obj.opt = CheckpointableAdam(0.1) + obj.opt = adam.AdamOptimizer(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) saver = checkpointable_utils.CheckpointableSaver(obj) @@ -837,7 +804,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = checkpointable.Checkpointable() obj.var = variable_scope.get_variable(name="v", initializer=0.) - obj.opt = CheckpointableAdam(0.1) + obj.opt = adam.AdamOptimizer(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) saver = checkpointable_utils.CheckpointableSaver(obj) @@ -847,13 +814,71 @@ class CheckpointingTests(test.TestCase): saver.restore(save_path) self.assertEqual(before_ops, graph.get_operations()) + def testMultipleGraphsNonSlotVariables(self): + with context.graph_mode(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + optimizer = adam.AdamOptimizer(0.001) + # Construct a model in one graph + first_graph = ops.Graph() + first_session = session_lib.Session(graph=first_graph) + with first_graph.as_default(), first_session.as_default(): + first_variable = resource_variable_ops.ResourceVariable([1.]) + first_root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, variable=first_variable) + train_op = optimizer.minimize(first_variable.read_value) + self.evaluate(checkpointable_utils.gather_initializers( + first_root_checkpointable)) + self.evaluate(train_op) + self.evaluate(first_variable.assign([1.])) + self.evaluate(optimizer.get_slot( + var=first_variable, name="m").assign([2.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.)) + + # Save and load in a second graph + second_graph = ops.Graph() + with second_graph.as_default(), session_lib.Session(graph=second_graph): + second_variable = resource_variable_ops.ResourceVariable([1.]) + second_root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, variable=second_variable) + train_op = optimizer.minimize(second_variable.read_value) + second_root_checkpointable.restore(None).initialize_or_restore() + self.evaluate(train_op) + self.evaluate(second_variable.assign([4.])) + self.evaluate(optimizer.get_slot( + var=second_variable, name="m").assign([5.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(6.)) + save_path = second_root_checkpointable.save(checkpoint_prefix) + self.evaluate(second_variable.assign([7.])) + self.evaluate(optimizer.get_slot( + var=second_variable, name="m").assign([8.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(6., self.evaluate(beta1_power)) + status = second_root_checkpointable.restore(save_path) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([4.], self.evaluate(second_variable)) + self.assertAllEqual([5.], self.evaluate(optimizer.get_slot( + var=second_variable, name="m"))) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(6., self.evaluate(beta1_power)) + + # Check that the first graph is unmolested + with first_graph.as_default(), first_session.as_default(): + self.assertAllEqual([1.], self.evaluate(first_variable)) + self.assertAllEqual([2.], self.evaluate(optimizer.get_slot( + var=first_variable, name="m"))) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(3., self.evaluate(beta1_power)) + class CheckpointCompatibilityTests(test.TestCase): def _initialized_model(self): input_value = constant_op.constant([[3.]]) network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index d382683858..643a3b7edc 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -307,6 +307,9 @@ class Variable(checkpointable.CheckpointableBase): if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") + # Store the graph key so optimizers know how to only retrieve variables from + # this graph. + self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access if isinstance(initial_value, checkpointable.CheckpointInitialValue): self._maybe_initialize_checkpointable() self._update_uid = initial_value.checkpoint_position.restore_uid diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index c5e7f3cdac..02c3aebda8 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -31,8 +31,8 @@ from tensorflow.python.util import nest # creation (avoiding double assignment when executing eagerly). VARIABLE_VALUE_KEY = "VARIABLE_VALUE" -_CheckpointableReference = collections.namedtuple( - "_CheckpointableReference", +CheckpointableReference = collections.namedtuple( + "CheckpointableReference", [ # The local name for this dependency. "name", @@ -301,14 +301,17 @@ class CheckpointableBase(object): Not __init__, since most objects will forget to call it. """ - if hasattr(self, "_checkpoint_dependencies"): + if hasattr(self, "_unconditional_checkpoint_dependencies"): # __init__ already called. This check means that we don't need # Checkpointable.__init__() in the constructor of every TensorFlow object. return - # A list of _CheckpointableReference objects. - self._checkpoint_dependencies = [] + # A list of CheckpointableReference objects. Some classes implementing + # `Checkpointable`, notably `Optimizer`s, may override the + # _checkpoint_dependencies property with conditional dependencies + # (e.g. based on the current graph when saving). + self._unconditional_checkpoint_dependencies = [] # Maps names -> Checkpointable objects - self._dependency_names = {} + self._unconditional_dependency_names = {} # Restorations for other Checkpointable objects on which this object may # eventually depend. self._deferred_dependencies = {} # local name -> _CheckpointPosition list @@ -320,6 +323,32 @@ class CheckpointableBase(object): "initialization code was run.") self._update_uid = -1 + @property + def _checkpoint_dependencies(self): + """All dependencies of this object. + + May be overridden to include conditional dependencies. + + Returns: + A list of `CheckpointableReference` objects indicating named + `Checkpointable` dependencies which should be saved along with this + object. + """ + return self._unconditional_checkpoint_dependencies + + def _lookup_dependency(self, name): + """Look up a dependency by name. + + May be overridden to include conditional dependencies. + + Args: + name: The local name of the dependency. + Returns: + A `Checkpointable` object, or `None` if no dependency by this name was + found. + """ + return self._unconditional_dependency_names.get(name, None) + def _add_variable_with_custom_getter( self, name, shape=None, dtype=dtypes.float32, initializer=None, getter=None, overwrite=False, @@ -349,7 +378,7 @@ class CheckpointableBase(object): ValueError: If the variable name is not unique. """ self._maybe_initialize_checkpointable() - if not overwrite and name in self._dependency_names: + if not overwrite and self._lookup_dependency(name) is not None: raise ValueError( ("A variable named '%s' already exists in this Checkpointable, but " "Checkpointable._add_variable called to create another with " @@ -461,9 +490,10 @@ class CheckpointableBase(object): raise TypeError( ("Checkpointable._track_checkpointable() passed type %s, not a " "Checkpointable.") % (type(checkpointable),)) - new_reference = _CheckpointableReference(name=name, ref=checkpointable) - if (name in self._dependency_names - and self._dependency_names[name] is not checkpointable): + new_reference = CheckpointableReference(name=name, ref=checkpointable) + current_object = self._lookup_dependency(name) + if (current_object is not None + and current_object is not checkpointable): if not overwrite: raise ValueError( ("Called Checkpointable._track_checkpointable() with name='%s', " @@ -471,19 +501,47 @@ class CheckpointableBase(object): "dependency. Names must be unique (or overwrite=True).") % (name,)) # This is a weird thing to do, but we're not going to stop people from # using __setattr__. - for index, (old_name, _) in enumerate(self._checkpoint_dependencies): + for index, (old_name, _) in enumerate( + self._unconditional_checkpoint_dependencies): if name == old_name: - self._checkpoint_dependencies[index] = new_reference + self._unconditional_checkpoint_dependencies[index] = new_reference else: - self._checkpoint_dependencies.append(new_reference) + self._unconditional_checkpoint_dependencies.append(new_reference) - self._dependency_names[name] = checkpointable - deferred_dependency_list = self._deferred_dependencies.pop(name, None) - if deferred_dependency_list is not None: - for checkpoint_position in deferred_dependency_list: - checkpoint_position.restore(checkpointable=checkpointable) + self._unconditional_dependency_names[name] = checkpointable + self._handle_deferred_dependencies(name=name, checkpointable=checkpointable) return checkpointable + def _handle_deferred_dependencies(self, name, checkpointable): + """Pop and load any deferred checkpoint restores into `checkpointable`. + + This method does not add a new dependency on `checkpointable`, but it does + check if any outstanding/deferred dependencies have been queued waiting for + this dependency to be added (matched based on `name`). If so, + `checkpointable` and its dependencies are restored. The restorations are + considered fulfilled and so are deleted. + + `_track_checkpointable` is more appropriate for adding a + normal/unconditional dependency, and includes handling for deferred + restorations. This method allows objects such as `Optimizer` to use the same + restoration logic while managing conditional dependencies themselves, by + overriding `_checkpoint_dependencies` and `_lookup_dependency` to change the + object's dependencies based on the context it is saved/restored in (a single + optimizer instance can have state associated with multiple graphs). + + Args: + name: The name of the dependency within this object (`self`), used to + match `checkpointable` with values saved in a checkpoint. + checkpointable: The Checkpointable object to restore (inheriting from + `CheckpointableBase`). + """ + deferred_dependencies_list = self._deferred_dependencies.pop(name, ()) + for checkpoint_position in sorted( + deferred_dependencies_list, + key=lambda restore: restore.checkpoint.restore_uid, + reverse=True): + checkpoint_position.restore(checkpointable) + def _restore_from_checkpoint_position(self, checkpoint_position): """Restore this object and its dependencies (may be deferred).""" # Attempt a breadth-first traversal, since presumably the user has more @@ -519,7 +577,7 @@ class CheckpointableBase(object): child_position = _CheckpointPosition( checkpoint=checkpoint, proto_id=child.node_id) - local_object = self._dependency_names.get(child.local_name, None) + local_object = self._lookup_dependency(child.local_name) if local_object is None: # We don't yet have a dependency registered with this name. Save it # in case we do. diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 454cc3add5..ba7e087c5a 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -216,7 +216,11 @@ def _get_processor(v): @tf_export("train.Optimizer") -class Optimizer(checkpointable.Checkpointable): +class Optimizer( + # Optimizers inherit from CheckpointableBase rather than Checkpointable + # since they do most of their dependency management themselves (slot + # variables are special-cased, and non-slot variables are keyed to graphs). + checkpointable.CheckpointableBase): """Base class for optimizers. This class defines the API to add Ops to train a model. You never use this @@ -645,7 +649,8 @@ class Optimizer(checkpointable.Checkpointable): def _create_non_slot_variable(self, initial_value, name, colocate_with): """Add an extra variable, not associated with a slot.""" - if context.in_graph_mode(): + in_graph_mode = context.in_graph_mode() + if in_graph_mode: graph = colocate_with.graph else: graph = None @@ -653,12 +658,51 @@ class Optimizer(checkpointable.Checkpointable): key = (name, graph) v = self._non_slot_dict.get(key, None) if v is None: + self._maybe_initialize_checkpointable() with ops.colocate_with(colocate_with): + if not in_graph_mode: + restored_initial_value = self._preload_simple_restoration( + name=name, shape=None) + if restored_initial_value is not None: + initial_value = restored_initial_value v = variable_scope.variable(initial_value, name=name, trainable=False) + # Restore this variable by name if necessary, but don't add a + # Checkpointable dependency. Optimizers return the current graph's + # non-slot variables from _checkpoint_dependencies explicitly rather + # than unconditionally adding dependencies (since there may be multiple + # non-slot variables with the same name in different graphs, trying to + # save all of them would result in errors). + self._handle_deferred_dependencies(name=name, checkpointable=v) self._non_slot_dict[key] = v return v + @property + def _checkpoint_dependencies(self): + """From Checkpointable. Gather graph-specific non-slot variables to save.""" + current_graph_non_slot_variables = [] + current_graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + for (name, _), variable_object in sorted(self._non_slot_dict.items(), + # Avoid comparing graphs + key=lambda item: item[0][0]): + if variable_object._graph_key == current_graph_key: # pylint: disable=protected-access + current_graph_non_slot_variables.append( + checkpointable.CheckpointableReference( + name=name, ref=variable_object)) + return (super(Optimizer, self)._checkpoint_dependencies + + current_graph_non_slot_variables) + + def _lookup_dependency(self, name): + """From Checkpointable. Find a non-slot variable in the current graph.""" + unconditional = super(Optimizer, self)._lookup_dependency(name) + if unconditional is not None: + return unconditional + if context.in_graph_mode(): + graph = ops.get_default_graph() + else: + graph = None + return self._get_non_slot_variable(name, graph=graph) + def _get_non_slot_variable(self, name, graph=None): return self._non_slot_dict.get((name, graph), None) diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt index c02e54adfb..16bfbf20d5 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdadeltaOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt index 2b619908fc..61cde9181c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdagradDAOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt index 2005cf4677..0a998c1afe 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdagradOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt index 0a2bae1d90..cc59541525 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdamOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt index 847f9ad759..1add3a9021 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.FtrlOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt index 13a58e0608..ef5bbd6ace 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.GradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt index bfbc2357a3..3d6e87f5eb 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.MomentumOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt index 437efa0a2b..e73861ff7c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.train.Optimizer" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt index 72f224605f..301b35b199 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.ProximalAdagradOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt index 316275b1fb..8815befa93 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.ProximalGradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt index af50a19861..e9819683ba 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.RMSPropOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt index 6edc516c93..3db96aff87 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.SyncReplicasOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { -- GitLab From 8cd02f550634ea7ae5f75531a49986e099ddf957 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 13:58:18 -0800 Subject: [PATCH 167/311] Fix Markdown syntax of bulleted list. PiperOrigin-RevId: 187378753 --- tensorflow/python/ops/distributions/uniform.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py index 3580af18f2..e0c554442f 100644 --- a/tensorflow/python/ops/distributions/uniform.py +++ b/tensorflow/python/ops/distributions/uniform.py @@ -45,11 +45,12 @@ class Uniform(distribution.Distribution): Z = b - a ``` - where: - * `low = a`, - * `high = b`, - * `Z` is the normalizing constant, and, - * `I[predicate]` is the [indicator function]( + where + + - `low = a`, + - `high = b`, + - `Z` is the normalizing constant, and + - `I[predicate]` is the [indicator function]( https://en.wikipedia.org/wiki/Indicator_function) for `predicate`. The parameters `low` and `high` must be shaped in a way that supports -- GitLab From 9f95084b53303af50d0a13fd9bb40a183af9104a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 14:22:07 -0800 Subject: [PATCH 168/311] Make fuse_op handle loops in the graph The current implementation of fuse_op does not work when there are loops in the tensorflow graph. PiperOrigin-RevId: 187382720 --- .../contrib/framework/python/framework/graph_util.py | 7 ++++++- .../contrib/framework/python/framework/graph_util_test.py | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py index 49eec3a3f1..2703224b1b 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util.py +++ b/tensorflow/contrib/framework/python/framework/graph_util.py @@ -85,14 +85,19 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes, if n not in reachable_by_input and n not in output_nodes_set: # n is between input and output, i.e., part of the fused op next_to_visit = [n] + visited = set() while next_to_visit: cur_node = next_to_visit[0] + visited.add(cur_node) del next_to_visit[0] if cur_node in reachable_by_input and cur_node not in input_nodes_set: raise TypeError("Node %s uses input %s not in input_nodes." % (n, cur_node)) if cur_node not in input_nodes_set: - next_to_visit += name_to_input_name[cur_node] + next_to_visit += [ + input_node for input_node in name_to_input_name[cur_node] + if input_node not in visited + ] elif n not in reachable_by_input: nodes_post_output.append(n) diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py index b8a6d109e1..812c5fbd8c 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util_test.py +++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py @@ -42,7 +42,8 @@ class GraphUtilTest(test.TestCase): graph_def = graph_pb2.GraphDef() node_a = GetNewNode('A', 'Placeholder', []) node_b = GetNewNode('B', 'Op1', ['A']) - node_c = GetNewNode('C', 'Op1', ['B']) + # A loop in the part that will be fused. + node_c = GetNewNode('C', 'Op1', ['B', 'C']) node_d = GetNewNode('D', 'Op1', ['C']) node_e = GetNewNode('E', 'Op1', ['D']) graph_def.node.extend([node_a, node_b, node_c, node_d, node_e]) -- GitLab From b21969b1305b211cd08f8d628b6a5a0e7a9e16f8 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 28 Feb 2018 14:36:09 -0800 Subject: [PATCH 169/311] [tf.data] Add `num_parallel_reads` argument to `tf.data.TFRecordDataset`. This provides a convenient way to use the `tf.contrib.data.parallel_interleave()` idiom for reading multiple TFRecord files in parallel. In addition, the `filenames` argument to the initializer can now be a `tf.data.Dataset` of strings, which makes it easier to use `TFRecordDataset` with `Dataset.list_files()`. PiperOrigin-RevId: 187384812 --- tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/interleave_ops.py | 97 +--------- .../kernel_tests/reader_dataset_ops_test.py | 36 +++- tensorflow/python/data/ops/BUILD | 1 + tensorflow/python/data/ops/readers.py | 166 +++++++++++++++++- .../tensorflow.data.-t-f-record-dataset.pbtxt | 2 +- 6 files changed, 200 insertions(+), 103 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 789cb9c99a..16fe31675f 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -126,6 +126,7 @@ py_library( "//tensorflow/python:tensor_util", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:readers", "//tensorflow/python/data/util:convert", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 3124ca1d15..91f19da02d 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -17,101 +17,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import convert -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import function -from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.data.ops import readers from tensorflow.python.util import deprecation -class ParallelInterleaveDataset(dataset_ops.Dataset): - """A `Dataset` that maps a function over its input and flattens the result.""" - - def __init__(self, input_dataset, map_func, cycle_length, block_length, - sloppy, buffer_output_elements, prefetch_input_elements): - """See `tf.contrib.data.parallel_interleave()` for details.""" - super(ParallelInterleaveDataset, self).__init__() - self._input_dataset = input_dataset - - @function.Defun(*nest.flatten( - sparse.as_dense_types(input_dataset.output_types, - input_dataset.output_classes))) - def tf_map_func(*args): - """A wrapper for Defun that facilitates shape inference.""" - # Pass in shape information from the input_dataset. - dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, - input_dataset.output_classes) - for arg, shape in zip(args, nest.flatten(dense_shapes)): - arg.set_shape(shape) - - nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types, input_dataset.output_shapes, - input_dataset.output_classes) - if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access - dataset = map_func(*nested_args) - else: - dataset = map_func(nested_args) - - if not isinstance(dataset, dataset_ops.Dataset): - raise TypeError("`map_func` must return a `Dataset` object.") - - self._output_classes = dataset.output_classes - self._output_types = dataset.output_types - self._output_shapes = dataset.output_shapes - - return dataset._as_variant_tensor() # pylint: disable=protected-access - - self._map_func = tf_map_func - self._map_func.add_to_graph(ops.get_default_graph()) - - self._cycle_length = ops.convert_to_tensor( - cycle_length, dtype=dtypes.int64, name="cycle_length") - self._block_length = ops.convert_to_tensor( - block_length, dtype=dtypes.int64, name="block_length") - self._sloppy = ops.convert_to_tensor( - sloppy, dtype=dtypes.bool, name="sloppy") - self._buffer_output_elements = convert.optional_param_to_tensor( - "buffer_output_elements", - buffer_output_elements, - argument_default=2 * block_length) - self._prefetch_input_elements = convert.optional_param_to_tensor( - "prefetch_input_elements", - prefetch_input_elements, - argument_default=2 * cycle_length) - - def _as_variant_tensor(self): - return gen_dataset_ops.parallel_interleave_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._map_func.captured_inputs, - self._cycle_length, - self._block_length, - self._sloppy, - self._buffer_output_elements, - self._prefetch_input_elements, - f=self._map_func, - output_types=nest.flatten( - sparse.as_dense_types(self.output_types, self.output_classes)), - output_shapes=nest.flatten( - sparse.as_dense_shapes(self.output_shapes, self.output_classes))) - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types - - def parallel_interleave(map_func, cycle_length, block_length=1, @@ -162,7 +71,7 @@ def parallel_interleave(map_func, @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): - return ParallelInterleaveDataset( + return readers.ParallelInterleaveDataset( dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements) @@ -221,7 +130,7 @@ def sloppy_interleave(map_func, cycle_length, block_length=1): @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): - return ParallelInterleaveDataset( + return readers.ParallelInterleaveDataset( dataset, map_func, cycle_length, diff --git a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py index d7140088c3..1ddedfda4e 100644 --- a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py @@ -21,6 +21,7 @@ import gzip import os import zlib +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op @@ -736,12 +737,43 @@ class TFRecordDatasetTest(test.TestCase): one_mebibyte = 2**20 d = readers.TFRecordDataset(self.test_filenames, buffer_size=one_mebibyte) iterator = d.make_one_shot_iterator() + next_element = iterator.get_next() with self.test_session() as sess: for j in range(self._num_files): for i in range(self._num_records): - self.assertAllEqual(self._record(j, i), sess.run(iterator.get_next())) + self.assertAllEqual(self._record(j, i), sess.run(next_element)) with self.assertRaises(errors.OutOfRangeError): - sess.run(iterator.get_next()) + sess.run(next_element) + + def testReadFromDatasetOfFiles(self): + files = dataset_ops.Dataset.from_tensor_slices(self.test_filenames) + d = readers.TFRecordDataset(files) + iterator = d.make_one_shot_iterator() + next_element = iterator.get_next() + with self.test_session() as sess: + for j in range(self._num_files): + for i in range(self._num_records): + self.assertAllEqual(self._record(j, i), sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testReadTenEpochsFromDatasetOfFilesInParallel(self): + files = dataset_ops.Dataset.from_tensor_slices( + self.test_filenames).repeat(10) + d = readers.TFRecordDataset(files, num_parallel_reads=4) + iterator = d.make_one_shot_iterator() + next_element = iterator.get_next() + expected = [] + actual = [] + with self.test_session() as sess: + for _ in range(10): + for j in range(self._num_files): + for i in range(self._num_records): + expected.append(self._record(j, i)) + actual.append(sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self.assertEqual(sorted(expected), sorted(actual)) if __name__ == "__main__": diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index dc293562ab..a8f2154db8 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -35,6 +35,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":dataset_ops", + "//tensorflow/python:array_ops", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py index fa7601741b..6c493d8163 100644 --- a/tensorflow/python/data/ops/readers.py +++ b/tensorflow/python/data/ops/readers.py @@ -17,11 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops.dataset_ops import Dataset +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import convert +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.util.tf_export import tf_export @@ -31,7 +35,7 @@ _DEFAULT_READER_BUFFER_SIZE_BYTES = 256 * 1024 # 256 KB @tf_export("data.TextLineDataset") -class TextLineDataset(Dataset): +class TextLineDataset(dataset_ops.Dataset): """A `Dataset` comprising lines from one or more text files.""" def __init__(self, filenames, compression_type=None, buffer_size=None): @@ -73,8 +77,7 @@ class TextLineDataset(Dataset): return dtypes.string -@tf_export("data.TFRecordDataset") -class TFRecordDataset(Dataset): +class _TFRecordDataset(dataset_ops.Dataset): """A `Dataset` comprising records from one or more TFRecord files.""" def __init__(self, filenames, compression_type=None, buffer_size=None): @@ -87,7 +90,7 @@ class TFRecordDataset(Dataset): buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes in the read buffer. 0 means no buffering. """ - super(TFRecordDataset, self).__init__() + super(_TFRecordDataset, self).__init__() # Force the type to string even if filenames is an empty list. self._filenames = ops.convert_to_tensor( filenames, dtypes.string, name="filenames") @@ -118,8 +121,159 @@ class TFRecordDataset(Dataset): return dtypes.string +class ParallelInterleaveDataset(dataset_ops.Dataset): + """A `Dataset` that maps a function over its input and flattens the result.""" + + def __init__(self, input_dataset, map_func, cycle_length, block_length, + sloppy, buffer_output_elements, prefetch_input_elements): + """See `tf.contrib.data.parallel_interleave()` for details.""" + super(ParallelInterleaveDataset, self).__init__() + self._input_dataset = input_dataset + + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) + def tf_map_func(*args): + """A wrapper for Defun that facilitates shape inference.""" + # Pass in shape information from the input_dataset. + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): + arg.set_shape(shape) + + nested_args = nest.pack_sequence_as(input_dataset.output_types, args) + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) + if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access + dataset = map_func(*nested_args) + else: + dataset = map_func(nested_args) + + if not isinstance(dataset, dataset_ops.Dataset): + raise TypeError("`map_func` must return a `Dataset` object.") + + self._output_classes = dataset.output_classes + self._output_types = dataset.output_types + self._output_shapes = dataset.output_shapes + + return dataset._as_variant_tensor() # pylint: disable=protected-access + + self._map_func = tf_map_func + self._map_func.add_to_graph(ops.get_default_graph()) + + self._cycle_length = ops.convert_to_tensor( + cycle_length, dtype=dtypes.int64, name="cycle_length") + self._block_length = ops.convert_to_tensor( + block_length, dtype=dtypes.int64, name="block_length") + self._sloppy = ops.convert_to_tensor( + sloppy, dtype=dtypes.bool, name="sloppy") + self._buffer_output_elements = convert.optional_param_to_tensor( + "buffer_output_elements", + buffer_output_elements, + argument_default=2 * block_length) + self._prefetch_input_elements = convert.optional_param_to_tensor( + "prefetch_input_elements", + prefetch_input_elements, + argument_default=2 * cycle_length) + + def _as_variant_tensor(self): + return gen_dataset_ops.parallel_interleave_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._map_func.captured_inputs, + self._cycle_length, + self._block_length, + self._sloppy, + self._buffer_output_elements, + self._prefetch_input_elements, + f=self._map_func, + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes + + @property + def output_shapes(self): + return self._output_shapes + + @property + def output_types(self): + return self._output_types + + +@tf_export("data.TFRecordDataset") +class TFRecordDataset(dataset_ops.Dataset): + """A `Dataset` comprising records from one or more TFRecord files.""" + + def __init__(self, filenames, compression_type=None, buffer_size=None, + num_parallel_reads=None): + """Creates a `TFRecordDataset` to read for one or more TFRecord files. + + NOTE: The `num_parallel_reads` argument can be used to improve performance + when reading from a remote filesystem. + + Args: + filenames: A `tf.string` tensor or `tf.data.Dataset` containing one or + more filenames. + compression_type: (Optional.) A `tf.string` scalar evaluating to one of + `""` (no compression), `"ZLIB"`, or `"GZIP"`. + buffer_size: (Optional.) A `tf.int64` scalar representing the number of + bytes in the read buffer. 0 means no buffering. + num_parallel_reads: (Optional.) A `tf.int64` scalar representing the + number of files to read in parallel. Defaults to reading files + sequentially. + + Raises: + TypeError: If any argument does not have the expected type. + ValueError: If any argument does not have the expected shape. + """ + super(TFRecordDataset, self).__init__() + if isinstance(filenames, dataset_ops.Dataset): + if filenames.output_types != dtypes.string: + raise TypeError( + "`filenames` must be a `tf.data.Dataset` of `tf.string` elements.") + if not filenames.output_shapes.is_compatible_with(tensor_shape.scalar()): + raise ValueError( + "`filenames` must be a `tf.data.Dataset` of scalar `tf.string` " + "elements.") + else: + filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string) + filenames = array_ops.reshape(filenames, [-1], name="flat_filenames") + filenames = dataset_ops.Dataset.from_tensor_slices(filenames) + + def read_one_file(filename): + return _TFRecordDataset(filename, compression_type, buffer_size) + + if num_parallel_reads is None: + self._impl = filenames.flat_map(read_one_file) + else: + self._impl = ParallelInterleaveDataset( + filenames, read_one_file, cycle_length=num_parallel_reads, + block_length=1, sloppy=False, buffer_output_elements=None, + prefetch_input_elements=None) + + def _as_variant_tensor(self): + return self._impl._as_variant_tensor() # pylint: disable=protected-access + + @property + def output_classes(self): + return self._impl.output_classes + + @property + def output_shapes(self): + return self._impl.output_shapes + + @property + def output_types(self): + return self._impl.output_types + + @tf_export("data.FixedLengthRecordDataset") -class FixedLengthRecordDataset(Dataset): +class FixedLengthRecordDataset(dataset_ops.Dataset): """A `Dataset` of fixed-length records from one or more binary files.""" def __init__(self, diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt index 9770389e5e..709ec127ce 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt @@ -17,7 +17,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filenames\', \'compression_type\', \'buffer_size\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'filenames\', \'compression_type\', \'buffer_size\', \'num_parallel_reads\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "apply" -- GitLab From 281677dffc41665343d434752df6464fe2b52319 Mon Sep 17 00:00:00 2001 From: Giuseppe Date: Wed, 28 Feb 2018 23:32:19 +0100 Subject: [PATCH 170/311] Fix markdown error in documentation. Newline in the middle of links was preventing their rendering. --- tensorflow/docs_src/install/install_sources.md | 3 +-- tensorflow/docs_src/install/install_windows.md | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 8d83e9f119..acf0af0d9d 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -393,8 +393,7 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index dedf485f93..f0a30ee394 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -153,8 +153,7 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). -- GitLab From f28e4d6faf94c08464f430f9cd01ef32dde6ad46 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Wed, 28 Feb 2018 14:43:39 -0800 Subject: [PATCH 171/311] Package c_api_experimental.h in binary release distributions. PiperOrigin-RevId: 187385913 --- tensorflow/c/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 5dfb743681..29ed957c9a 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -17,7 +17,10 @@ load( filegroup( name = "headers", - srcs = ["c_api.h"], + srcs = [ + "c_api.h", + "c_api_experimental.h", + ], visibility = ["//tensorflow:__subpackages__"], ) -- GitLab From 91d49c7d98114da4e4647c62d9f9b69119296b69 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 28 Feb 2018 14:50:02 -0800 Subject: [PATCH 172/311] Removing underscore prefixes from hidden generated Python functions. PiperOrigin-RevId: 187386941 --- tensorflow/compiler/tests/binary_ops_test.py | 50 +++++------ tensorflow/compiler/tests/concat_ops_test.py | 2 +- tensorflow/compiler/tests/image_ops_test.py | 2 +- tensorflow/compiler/tests/lrn_ops_test.py | 8 +- .../compiler/tests/pooling_ops_3d_test.py | 18 ++-- tensorflow/compiler/tests/pooling_ops_test.py | 10 +-- .../compiler/tests/spacetobatch_op_test.py | 4 +- tensorflow/compiler/tests/stack_ops_test.py | 46 +++++----- .../compiler/tests/tensor_array_ops_test.py | 2 +- tensorflow/contrib/lookup/lookup_ops.py | 38 +++----- tensorflow/python/__init__.py | 4 - .../python/debug/lib/debug_gradients.py | 9 +- tensorflow/python/eager/benchmarks_test.py | 3 +- tensorflow/python/eager/ops_test.py | 8 +- .../python/eager/python_eager_op_gen.cc | 25 ++++-- tensorflow/python/framework/function_test.py | 4 +- .../python/framework/graph_util_test.py | 14 +-- tensorflow/python/framework/ops_test.py | 2 +- tensorflow/python/framework/python_op_gen.cc | 56 ++++++++++-- .../python/framework/python_op_gen_internal.h | 3 + .../python/framework/tensor_util_test.py | 2 +- .../python/grappler/layout_optimizer_test.py | 10 +-- .../python/kernel_tests/array_ops_test.py | 2 +- .../kernel_tests/batchtospace_op_test.py | 2 +- .../python/kernel_tests/bcast_ops_test.py | 4 +- .../kernel_tests/checkpoint_ops_test.py | 34 ++++---- .../python/kernel_tests/concat_op_test.py | 22 ++--- .../kernel_tests/control_flow_ops_py_test.py | 10 +-- .../python/kernel_tests/cwise_ops_test.py | 10 +-- .../kernel_tests/determinant_op_test.py | 2 +- .../fractional_avg_pool_op_test.py | 10 +-- .../fractional_max_pool_op_test.py | 28 +++--- .../matrix_exponential_op_test.py | 12 +-- .../kernel_tests/matrix_logarithm_op_test.py | 14 +-- .../python/kernel_tests/pooling_ops_test.py | 86 +++++++++---------- .../kernel_tests/save_restore_ops_test.py | 7 +- tensorflow/python/kernel_tests/scalar_test.py | 4 +- .../kernel_tests/spacetobatch_op_test.py | 4 +- .../kernel_tests/sparse_xent_op_test.py | 13 +-- .../python/kernel_tests/stack_ops_test.py | 82 +++++++++--------- .../kernel_tests/tensor_array_ops_test.py | 2 +- .../python/kernel_tests/unique_op_test.py | 6 +- .../python/kernel_tests/variable_ops_test.py | 24 +++--- .../python/kernel_tests/variables_test.py | 2 +- .../python/kernel_tests/xent_op_test.py | 12 +-- .../python/ops/accumulate_n_benchmark.py | 7 +- tensorflow/python/ops/array_grad.py | 16 +--- tensorflow/python/ops/array_ops.py | 77 ++++++++--------- tensorflow/python/ops/batch_norm_benchmark.py | 5 +- .../python/ops/candidate_sampling_ops.py | 12 +-- tensorflow/python/ops/control_flow_grad.py | 1 - tensorflow/python/ops/control_flow_ops.py | 46 ++++------ tensorflow/python/ops/ctc_ops.py | 6 +- tensorflow/python/ops/data_flow_ops.py | 42 ++++----- tensorflow/python/ops/functional_ops.py | 2 +- tensorflow/python/ops/gradients_impl.py | 2 +- tensorflow/python/ops/histogram_ops.py | 4 +- tensorflow/python/ops/image_grad.py | 12 +-- tensorflow/python/ops/image_ops_impl.py | 12 +-- tensorflow/python/ops/io_ops.py | 75 ++++++++-------- tensorflow/python/ops/linalg/linalg_impl.py | 8 +- tensorflow/python/ops/linalg_ops.py | 15 ++-- tensorflow/python/ops/logging_ops.py | 15 ++-- tensorflow/python/ops/lookup_ops.py | 20 ++--- tensorflow/python/ops/math_grad.py | 59 +++++-------- tensorflow/python/ops/math_ops.py | 80 +++++++++-------- tensorflow/python/ops/nn_batchnorm_test.py | 3 +- tensorflow/python/ops/nn_grad.py | 64 +++++++------- tensorflow/python/ops/nn_impl.py | 6 +- tensorflow/python/ops/nn_ops.py | 39 ++++----- tensorflow/python/ops/parsing_ops.py | 23 ++--- tensorflow/python/ops/random_ops.py | 16 ++-- tensorflow/python/ops/script_ops.py | 8 +- tensorflow/python/ops/session_ops.py | 13 ++- tensorflow/python/ops/sparse_grad.py | 11 +-- tensorflow/python/ops/sparse_ops.py | 34 ++++---- tensorflow/python/ops/standard_ops.py | 1 - tensorflow/python/ops/state_ops.py | 15 ++-- tensorflow/python/ops/string_ops.py | 4 +- tensorflow/python/ops/summary_ops.py | 3 +- tensorflow/python/ops/tensor_array_ops.py | 20 ++--- tensorflow/python/summary/summary.py | 9 +- tensorflow/python/training/checkpoint_ops.py | 6 +- .../training/learning_rate_decay_test.py | 20 ++--- .../python/training/moving_averages_test.py | 2 +- tensorflow/python/training/saver.py | 6 +- .../python/training/saver_test_utils.py | 12 +-- tensorflow/python/user_ops/user_ops.py | 2 +- 88 files changed, 742 insertions(+), 803 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 0e4efaed86..6bcfed7b69 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -71,7 +71,7 @@ class BinaryOpsTest(XLATestCase): expected=np.array([[[[False, True], [True, False]]]], dtype=dtype)) self._testBinary( - gen_math_ops._real_div, + gen_math_ops.real_div, np.array([3, 3, -1.5, -8, 44], dtype=dtype), np.array([2, -2, 7, -4, 0], dtype=dtype), expected=np.array( @@ -108,57 +108,57 @@ class BinaryOpsTest(XLATestCase): [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4, np.pi], dtype=dtype)) self._testBinary( - gen_math_ops._reciprocal_grad, + gen_math_ops.reciprocal_grad, np.array([4, -3, -2, 1], dtype=dtype), np.array([5, -6, 7, -8], dtype=dtype), expected=np.array([-80, 54, -28, 8], dtype=dtype)) self._testBinary( - gen_math_ops._sigmoid_grad, + gen_math_ops.sigmoid_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([-60, -36, -14, 0], dtype=dtype)) self._testBinary( - gen_math_ops._rsqrt_grad, + gen_math_ops.rsqrt_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([-160, -81, -28, -4], dtype=dtype)) self._testBinary( - gen_math_ops._sqrt_grad, + gen_math_ops.sqrt_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([0.625, 1, 1.75, 4], dtype=dtype)) self._testBinary( - gen_nn_ops._softplus_grad, + gen_nn_ops.softplus_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array( [3.97322869, 2.99258232, 1.99817801, 0.99966466], dtype=dtype)) self._testBinary( - gen_nn_ops._softsign_grad, + gen_nn_ops.softsign_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array( [0.11111111, 0.06122449, 0.03125, 0.01234568], dtype=dtype)) self._testBinary( - gen_math_ops._tanh_grad, + gen_math_ops.tanh_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([-75, -48, -21, 0], dtype=dtype)) self._testBinary( - gen_nn_ops._elu_grad, + gen_nn_ops.elu_grad, np.array([1, 2, 3, 4, 5, 6], dtype=dtype), np.array([-.6, -.4, -.2, 0, .2, .4], dtype=dtype), expected=np.array([0.4, 1.2, 2.4, 4, 5, 6], dtype=dtype)) self._testBinary( - gen_nn_ops._selu_grad, + gen_nn_ops.selu_grad, np.array([1, 2, 3, 4, 5, 6], dtype=dtype), np.array([-.6, -.4, -.2, .2, .4, .6], dtype=dtype), expected=np.array( @@ -166,20 +166,20 @@ class BinaryOpsTest(XLATestCase): 4.202803949422, 5.2535049367774, 6.30420592413], dtype=dtype)) self._testBinary( - gen_nn_ops._relu_grad, + gen_nn_ops.relu_grad, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), np.array([0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10], dtype=dtype)) self._testBinary( - gen_nn_ops._relu6_grad, + gen_nn_ops.relu6_grad, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtype), np.array( [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) self._testBinary( - gen_nn_ops._softmax_cross_entropy_with_logits, + gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), np.array([[0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1]], dtype=dtype), expected=[ @@ -191,7 +191,7 @@ class BinaryOpsTest(XLATestCase): equality_test=self.ListsAreClose) self._testBinary( - gen_nn_ops._sparse_softmax_cross_entropy_with_logits, + gen_nn_ops.sparse_softmax_cross_entropy_with_logits, np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1.0, 1.1, 1.2]], dtype=dtype), np.array([2, 1, 7], dtype=np.int32), @@ -207,7 +207,7 @@ class BinaryOpsTest(XLATestCase): def testIntOps(self): for dtype in self.int_types: self._testBinary( - gen_math_ops._truncate_div, + gen_math_ops.truncate_div, np.array([3, 3, -1, -9, -8], dtype=dtype), np.array([2, -2, 7, 2, -4], dtype=dtype), expected=np.array([1, -1, 0, -4, 2], dtype=dtype)) @@ -369,7 +369,7 @@ class BinaryOpsTest(XLATestCase): expected=np.array([[[[False, True], [True, False]]]], dtype=dtype)) self._testBinary( - gen_math_ops._real_div, + gen_math_ops.real_div, np.array([3, 3j, -1.5j, -8, 2 + 3j, 2 + 4j], dtype=dtype), np.array([2, -2, 7j, -4j, 4 - 6j, 1 + 2j], dtype=dtype), expected=np.array( @@ -378,7 +378,7 @@ class BinaryOpsTest(XLATestCase): # Test inf/nan scenarios. self._testBinary( - gen_math_ops._real_div, + gen_math_ops.real_div, np.array([4 + 3j, 4, 3j, -4, -4j, 2 - 3j], dtype=dtype), np.array([0, 0, 0, 0, 0, 0], dtype=dtype), expected=np.array( @@ -418,19 +418,19 @@ class BinaryOpsTest(XLATestCase): lhs = np.array([4 + 2j, -3 - 1j, 2j, 1], dtype=dtype) rhs = np.array([5, -6j, 7 - 3j, -8j], dtype=dtype) self._testBinary( - gen_math_ops._reciprocal_grad, lhs, rhs, expected=-rhs * lhs * lhs) + gen_math_ops.reciprocal_grad, lhs, rhs, expected=-rhs * lhs * lhs) self._testBinary( - gen_math_ops._sigmoid_grad, lhs, rhs, expected=rhs * lhs * (1 - lhs)) + gen_math_ops.sigmoid_grad, lhs, rhs, expected=rhs * lhs * (1 - lhs)) self._testBinary( - gen_math_ops._rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2) + gen_math_ops.rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2) self._testBinary( - gen_math_ops._sqrt_grad, lhs, rhs, expected=rhs / (2 * lhs)) + gen_math_ops.sqrt_grad, lhs, rhs, expected=rhs / (2 * lhs)) self._testBinary( - gen_math_ops._tanh_grad, lhs, rhs, expected=rhs * (1 - lhs * lhs)) + gen_math_ops.tanh_grad, lhs, rhs, expected=rhs * (1 - lhs * lhs)) def testComplexMath(self): for dtype in self.complex_types: @@ -538,7 +538,7 @@ class BinaryOpsTest(XLATestCase): if dtype not in self.complex_types: # floordiv unsupported for complex. self._testBinary( - gen_math_ops._floor_div, + gen_math_ops.floor_div, np.array([3, 3, -1, -9, -8], dtype=dtype), np.array([2, -2, 7, 2, -4], dtype=dtype), expected=np.array([1, -2, -1, -5, 2], dtype=dtype)) @@ -554,12 +554,12 @@ class BinaryOpsTest(XLATestCase): def _testRemainder(self, dtype): """Test cases for remainder operators.""" self._testBinary( - gen_math_ops._floor_mod, + gen_math_ops.floor_mod, np.array([3, 3, -1, -8], dtype=dtype), np.array([2, -2, 7, -4], dtype=dtype), expected=np.array([1, -1, 6, 0], dtype=dtype)) self._testBinary( - gen_math_ops._truncate_mod, + gen_math_ops.truncate_mod, np.array([3, 3, -1, -8], dtype=dtype), np.array([2, -2, 7, -4], dtype=dtype), expected=np.array([1, 1, -1, 0], dtype=dtype)) diff --git a/tensorflow/compiler/tests/concat_ops_test.py b/tensorflow/compiler/tests/concat_ops_test.py index 81734082d9..f10973e19f 100644 --- a/tensorflow/compiler/tests/concat_ops_test.py +++ b/tensorflow/compiler/tests/concat_ops_test.py @@ -301,7 +301,7 @@ class ConcatOffsetTest(XLATestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) s2 = constant_op.constant([2, 20, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]]) diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py index 538fa8e8e5..3bc41b7cfd 100644 --- a/tensorflow/compiler/tests/image_ops_test.py +++ b/tensorflow/compiler/tests/image_ops_test.py @@ -426,7 +426,7 @@ class ResizeBilinearTest(XLATestCase): with self.test_session() as sess, self.test_scope(): dtype = dtype or np.float32 grads = array_ops.placeholder(np.float32) - resized = gen_image_ops._resize_bilinear_grad( + resized = gen_image_ops.resize_bilinear_grad( grads, np.zeros([1, input_shape[0], input_shape[1], 1], dtype=dtype), align_corners=True) diff --git a/tensorflow/compiler/tests/lrn_ops_test.py b/tensorflow/compiler/tests/lrn_ops_test.py index 5d8d89224d..69bd8f7230 100644 --- a/tensorflow/compiler/tests/lrn_ops_test.py +++ b/tensorflow/compiler/tests/lrn_ops_test.py @@ -115,11 +115,11 @@ class LRNTest(XLATestCase): out_image = constant_op.constant(out_image_vals, shape=shape) out_grads = constant_op.constant(out_grads_vals, shape=shape) with ops.device(CPU_DEVICE): - expected = gen_nn_ops._lrn_grad(out_grads, in_image, out_image, - depth_radius, bias, alpha, beta) + expected = gen_nn_ops.lrn_grad(out_grads, in_image, out_image, + depth_radius, bias, alpha, beta) with self.test_scope(): - actual = gen_nn_ops._lrn_grad(out_grads, in_image, out_image, - depth_radius, bias, alpha, beta) + actual = gen_nn_ops.lrn_grad(out_grads, in_image, out_image, + depth_radius, bias, alpha, beta) expected_val = expected.eval() actual_val = actual.eval() self.assertAllClose(actual_val, expected_val, rtol=1e-3) diff --git a/tensorflow/compiler/tests/pooling_ops_3d_test.py b/tensorflow/compiler/tests/pooling_ops_3d_test.py index eb48fe555a..4eed903963 100644 --- a/tensorflow/compiler/tests/pooling_ops_3d_test.py +++ b/tensorflow/compiler/tests/pooling_ops_3d_test.py @@ -33,7 +33,7 @@ from tensorflow.python.platform import test # MaxPoolGrad. def _AvgPoolGrad(inputs, outputs, output_gradients, ksize, strides, padding): del outputs # Unused by average-pooling gradients. - return gen_nn_ops._avg_pool3d_grad( + return gen_nn_ops.avg_pool3d_grad( inputs.get_shape().as_list(), output_gradients, ksize=ksize, @@ -263,7 +263,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding1_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[1, 3, 3, 3, 1], ksize=[1, 1, 1], strides=[1, 1, 1], @@ -272,7 +272,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding2_1_6_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 3, 6, 3], ksize=[2, 2, 2], strides=[1, 1, 1], @@ -281,7 +281,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding2_1_7_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 5, 7, 3], ksize=[2, 2, 2], strides=[1, 1, 1], @@ -290,7 +290,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding2_2_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 2, 2, 2, 3], ksize=[2, 2, 2], strides=[2, 2, 2], @@ -299,7 +299,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding1_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 2, 4, 1], ksize=[1, 1, 1], strides=[1, 1, 1], @@ -308,7 +308,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding2_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 2, 4, 1], ksize=[2, 2, 2], strides=[1, 1, 1], @@ -317,7 +317,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding2_2_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 5, 2, 4, 3], ksize=[2, 2, 2], strides=[2, 2, 2], @@ -326,7 +326,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding3_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[1, 3, 3, 7, 1], ksize=[3, 3, 3], strides=[1, 1, 1], diff --git a/tensorflow/compiler/tests/pooling_ops_test.py b/tensorflow/compiler/tests/pooling_ops_test.py index 7c19a99c4e..e0e85295fe 100644 --- a/tensorflow/compiler/tests/pooling_ops_test.py +++ b/tensorflow/compiler/tests/pooling_ops_test.py @@ -459,7 +459,7 @@ class PoolGradTest(XLATestCase): padding="SAME") def testMaxPool(self): - self._TestPooling(nn_ops.max_pool, gen_nn_ops._max_pool_grad) + self._TestPooling(nn_ops.max_pool, gen_nn_ops.max_pool_grad) def testAvgPool(self): # Wrapper around AvgPoolGrad that ignores extra arguments needed by @@ -467,7 +467,7 @@ class PoolGradTest(XLATestCase): def AvgPoolGrad(inputs, outputs, output_gradients, ksize, strides, padding, data_format): del outputs # Unused by average-pooling gradients. - return gen_nn_ops._avg_pool_grad( + return gen_nn_ops.avg_pool_grad( inputs.get_shape().as_list(), output_gradients, ksize=ksize, @@ -483,7 +483,7 @@ class PoolGradTest(XLATestCase): def testMaxPoolKernelSmallerThanStrideValid(self): self._VerifyValues( nn_ops.max_pool, - gen_nn_ops._max_pool_grad, + gen_nn_ops.max_pool_grad, input_sizes=[1, 7, 7, 1], ksize=[1, 2, 2, 1], strides=[1, 3, 3, 1], @@ -492,7 +492,7 @@ class PoolGradTest(XLATestCase): def testMaxPoolKernelSmallerThanStrideSame(self): self._VerifyValues( nn_ops.max_pool, - gen_nn_ops._max_pool_grad, + gen_nn_ops.max_pool_grad, input_sizes=[1, 3, 3, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], @@ -500,7 +500,7 @@ class PoolGradTest(XLATestCase): self._VerifyValues( nn_ops.max_pool, - gen_nn_ops._max_pool_grad, + gen_nn_ops.max_pool_grad, input_sizes=[1, 4, 4, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], diff --git a/tensorflow/compiler/tests/spacetobatch_op_test.py b/tensorflow/compiler/tests/spacetobatch_op_test.py index c013f4b50a..92518aadc4 100644 --- a/tensorflow/compiler/tests/spacetobatch_op_test.py +++ b/tensorflow/compiler/tests/spacetobatch_op_test.py @@ -75,11 +75,11 @@ class SpaceToBatchTest(XLATestCase): for dtype in self.float_types: # outputs = space_to_batch(inputs) placeholder = array_ops.placeholder(dtype) - x_tf = gen_array_ops._space_to_batch( + x_tf = gen_array_ops.space_to_batch( placeholder, paddings, block_size=block_size) self.assertAllEqual(sess.run(x_tf, {placeholder: inputs}), outputs) # inputs = batch_to_space(outputs) - x_tf = gen_array_ops._batch_to_space( + x_tf = gen_array_ops.batch_to_space( placeholder, paddings, block_size=block_size) self.assertAllEqual(sess.run(x_tf, {placeholder: outputs}), inputs) diff --git a/tensorflow/compiler/tests/stack_ops_test.py b/tensorflow/compiler/tests/stack_ops_test.py index 2b9c227973..94342f9567 100644 --- a/tensorflow/compiler/tests/stack_ops_test.py +++ b/tensorflow/compiler/tests/stack_ops_test.py @@ -34,33 +34,33 @@ class StackOpTest(XLATestCase): with self.test_session(), self.test_scope(): size = array_ops.placeholder(dtypes.int32) v = array_ops.placeholder(dtypes.float32) - h = gen_data_flow_ops._stack_v2(size, dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, v) + h = gen_data_flow_ops.stack_v2(size, dtypes.float32, stack_name="foo") + c = gen_data_flow_ops.stack_push_v2(h, v) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose([[4.0, 5.0]], c1.eval({size: 5, v: [[4.0, 5.0]]})) def testStackPushPopSwap(self): with self.test_session(), self.test_scope(): a = np.arange(2000) x = array_ops.placeholder(dtypes.float32) - h = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, x, swap_memory=True) + h = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + c = gen_data_flow_ops.stack_push_v2(h, x, swap_memory=True) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose(a, c1.eval({x: a})) def testMultiStack(self): with self.test_session(), self.test_scope(): v = array_ops.placeholder(dtypes.float32) - h1 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, v) + h1 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + c1 = gen_data_flow_ops.stack_push_v2(h1, v) with ops.control_dependencies([c1]): - c1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - h2 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="bar") - c2 = gen_data_flow_ops._stack_push_v2(h2, 5.0) + c1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + h2 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="bar") + c2 = gen_data_flow_ops.stack_push_v2(h2, 5.0) with ops.control_dependencies([c2]): - c2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + c2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) r = c1 + c2 self.assertAllClose(9.0, r.eval({v: 4.0})) @@ -69,15 +69,15 @@ class StackOpTest(XLATestCase): with self.test_session() as sess, self.test_scope(): v1 = array_ops.placeholder(dtypes.float32) v2 = array_ops.placeholder(dtypes.float32) - h1 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - h2 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") + h1 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + h2 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, v1) + c1 = gen_data_flow_ops.stack_push_v2(h1, v1) with ops.control_dependencies([c1]): - c2 = gen_data_flow_ops._stack_push_v2(h2, v2) + c2 = gen_data_flow_ops.stack_push_v2(h2, v2) with ops.control_dependencies([c2]): - pop1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - pop2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + pop1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + pop2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) out1, out2 = sess.run([pop1, pop2], {v1: 4.0, v2: 5.0}) self.assertAllClose(out1, 4.0) @@ -86,17 +86,17 @@ class StackOpTest(XLATestCase): def testCloseStack(self): with self.test_session() as sess, self.test_scope(): size = array_ops.placeholder(dtypes.int32) - h = gen_data_flow_ops._stack_v2(size, dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_close_v2(h) + h = gen_data_flow_ops.stack_v2(size, dtypes.float32, stack_name="foo") + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1, {size: 5}) def testPushCloseStack(self): with self.test_session() as sess, self.test_scope(): v = array_ops.placeholder(dtypes.float32) - h = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, v) + h = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + c = gen_data_flow_ops.stack_push_v2(h, v) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_close_v2(h) + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1, {v: [[4.0, 5.0]]}) diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py index a62925a181..7624d6e4b2 100644 --- a/tensorflow/compiler/tests/tensor_array_ops_test.py +++ b/tensorflow/compiler/tests/tensor_array_ops_test.py @@ -338,7 +338,7 @@ class TensorArrayTest(xla_test.XLATestCase): w0 = ta.write(0, [[4.0, 5.0]]) # Test reading wrong datatype. - r0_bad = gen_data_flow_ops._tensor_array_read_v3( + r0_bad = gen_data_flow_ops.tensor_array_read_v3( handle=w0.handle, index=0, dtype=dtype2, flow_in=w0.flow) with self.assertRaisesOpError("TensorArray dtype is "): r0_bad.eval() diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index a430dac4ec..62f1c810fc 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -341,23 +341,21 @@ class MutableHashTable(LookupInterface): # training to work correctly. Use the node name if no shared_name has been # explicitly specified. use_node_name_sharing = checkpoint and shared_name is None - # pylint: disable=protected-access if self._default_value.get_shape().ndims == 0: - self._table_ref = gen_lookup_ops._mutable_hash_table_v2( + self._table_ref = gen_lookup_ops.mutable_hash_table_v2( shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, key_dtype=key_dtype, value_dtype=value_dtype, name=name) else: - self._table_ref = gen_lookup_ops._mutable_hash_table_of_tensors_v2( + self._table_ref = gen_lookup_ops.mutable_hash_table_of_tensors_v2( shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, key_dtype=key_dtype, value_dtype=value_dtype, value_shape=self._default_value.get_shape(), name=name) - # pylint: enable=protected-access super(MutableHashTable, self).__init__(key_dtype, value_dtype, self._table_ref.op.name.split( "/")[-1]) @@ -378,9 +376,7 @@ class MutableHashTable(LookupInterface): with ops.name_scope(name, "%s_Size" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - - # pylint: disable=protected-access - return gen_lookup_ops._lookup_table_size_v2(self._table_ref, name=name) + return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -406,8 +402,7 @@ class MutableHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_find" % self._name, (self._table_ref, keys, self._default_value)) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - values = gen_lookup_ops._lookup_table_find_v2( + values = gen_lookup_ops.lookup_table_find_v2( self._table_ref, keys, self._default_value, name=name) values.set_shape(keys.get_shape().concatenate(self._value_shape)) @@ -437,7 +432,7 @@ class MutableHashTable(LookupInterface): [self._table_ref, keys, values]) as name: with ops.colocate_with(self._table_ref): # pylint: disable=protected-access - op = gen_lookup_ops._lookup_table_insert_v2( + op = gen_lookup_ops.lookup_table_insert_v2( self._table_ref, keys, values, name=name) return op @@ -454,8 +449,7 @@ class MutableHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_export_values" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - exported_keys, exported_values = gen_lookup_ops._lookup_table_export_v2( + exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2( self._table_ref, self._key_dtype, self._value_dtype, name=name) exported_values.set_shape(exported_keys.get_shape().concatenate( @@ -477,7 +471,7 @@ class MutableHashTable(LookupInterface): def restore(self, restored_tensors, unused_restored_shapes): # pylint: disable=protected-access with ops.colocate_with(self.op._table_ref): - return gen_lookup_ops._lookup_table_import_v2( + return gen_lookup_ops.lookup_table_import_v2( self.op._table_ref, restored_tensors[0], restored_tensors[1]) @@ -551,8 +545,7 @@ class MutableDenseHashTable(LookupInterface): # explicitly specified. use_node_name_sharing = checkpoint and shared_name is None empty_key = ops.convert_to_tensor(empty_key, dtype=key_dtype) - # pylint: disable=protected-access - self._table_ref = gen_lookup_ops._mutable_dense_hash_table_v2( + self._table_ref = gen_lookup_ops.mutable_dense_hash_table_v2( empty_key=empty_key, shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, @@ -560,7 +553,6 @@ class MutableDenseHashTable(LookupInterface): value_shape=self._value_shape, initial_num_buckets=initial_num_buckets, name=name) - # pylint: enable=protected-access super(MutableDenseHashTable, self).__init__( key_dtype, value_dtype, self._table_ref.op.name.split("/")[-1]) @@ -580,8 +572,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_Size" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - return gen_lookup_ops._lookup_table_size_v2(self._table_ref, name=name) + return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -607,8 +598,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_find" % self._name, [self._table_ref, keys]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - values = gen_lookup_ops._lookup_table_find_v2( + values = gen_lookup_ops.lookup_table_find_v2( self._table_ref, keys, self._default_value, name=name) if keys.get_shape().ndims is not None and keys.get_shape().ndims > 0: @@ -640,8 +630,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_insert" % self._name, [self._table_ref, keys, values]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - op = gen_lookup_ops._lookup_table_insert_v2( + op = gen_lookup_ops.lookup_table_insert_v2( self._table_ref, keys, values, name=name) return op @@ -658,8 +647,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_export_values" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - exported_keys, exported_values = gen_lookup_ops._lookup_table_export_v2( + exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2( self._table_ref, self._key_dtype, self._value_dtype, name=name) exported_values.set_shape(exported_keys.get_shape().concatenate( @@ -681,5 +669,5 @@ class MutableDenseHashTable(LookupInterface): def restore(self, restored_tensors, unused_restored_shapes): # pylint: disable=protected-access with ops.colocate_with(self.op._table_ref): - return gen_lookup_ops._lookup_table_import_v2( + return gen_lookup_ops.lookup_table_import_v2( self.op._table_ref, restored_tensors[0], restored_tensors[1]) diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 02ed5517ca..d6715fa522 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -198,13 +198,9 @@ tf_export('TensorInfo')(TensorInfo) _allowed_symbols.extend([ 'arg_max', 'arg_min', - 'mul', # use tf.multiply instead. - 'neg', # use tf.negative instead. - 'sub', # use tf.subtract instead. 'create_partitioned_variables', 'deserialize_many_sparse', 'lin_space', - 'list_diff', # Use tf.listdiff instead. 'listdiff', # Use tf.listdiff instead. 'parse_single_sequence_example', 'serialize_many_sparse', diff --git a/tensorflow/python/debug/lib/debug_gradients.py b/tensorflow/python/debug/lib/debug_gradients.py index 16f51a4b32..589a13db7f 100644 --- a/tensorflow/python/debug/lib/debug_gradients.py +++ b/tensorflow/python/debug/lib/debug_gradients.py @@ -156,11 +156,12 @@ class GradientsDebugger(object): # TODO(cais): Implement value_stack. grad_debug_op_name = _tensor_to_grad_debug_op_name(input_tensor, self._uuid) # pylint: disable=protected-access - identity_op = (gen_array_ops._debug_gradient_ref_identity - if input_tensor.dtype._is_ref_dtype - else gen_array_ops._debug_gradient_identity) - debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name) + identity_op = ( + gen_array_ops.debug_gradient_ref_identity + if input_tensor.dtype._is_ref_dtype else + gen_array_ops.debug_gradient_identity) # pylint: enable=protected-access + debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name) assert debug_grad_identity.dtype == input_tensor.dtype if debug_grad_identity.op.name != grad_debug_op_name: raise ValueError( diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index b56cbe80a7..228ff62b20 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -243,7 +243,8 @@ class MicroBenchmarks(test.Benchmark): def _benchmark_gen_math_ops_matmul(self, m, transpose_b, num_iters): def func(): - gen_math_ops._mat_mul(m, m, transpose_b=transpose_b) + gen_math_ops.mat_mul(m, m, transpose_b=transpose_b) + self._run(func, num_iters) def _benchmark_tfe_py_fastpath_execute_matmul(self, m, transpose_b, diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index 553571d267..f70c7544d6 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -131,8 +131,12 @@ class OpsTest(test_util.TensorFlowTestCase): dtype=dtypes.int64) values = constant_op.constant([2, 3, 5, 7, 11]) shape = constant_op.constant([2, 7], dtype=dtypes.int64) - result = sparse_ops.gen_sparse_ops._sparse_split( # pylint: disable=protected-access - split_dim, indices, values, shape, num_split=2) + result = sparse_ops.gen_sparse_ops.sparse_split( + split_dim, + indices, + values, + shape, + num_split=2) output_indices, output_values, output_shape = result self.assertEqual(2, len(output_indices)) self.assertEqual(2, len(output_values)) diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index 554e29c7e0..3de7445a50 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -955,10 +955,10 @@ from tensorflow.python.util.tf_export import tf_export if (api_def->visibility() == ApiDef::SKIP) { continue; } - // An op is hidden if either its ApiDef visibility is HIDDEN // or it is in the hidden_ops list. bool is_hidden = api_def->visibility() == ApiDef::HIDDEN; + bool hidden_by_api_def = is_hidden; if (!is_hidden) { for (const string& hidden : hidden_ops) { if (op_def.name() == hidden) { @@ -971,13 +971,22 @@ from tensorflow.python.util.tf_export import tf_export string function_name; python_op_gen_internal::GenerateLowerCaseOpName(op_def.name(), &function_name); - if (is_hidden) function_name = strings::StrCat("_", function_name); - - // When users create custom python wrappers, they may link in the - // default op registry by accident, and because they can't - // enumerate all 'hidden' symbols, this guard is to prevent - // instantiating a python reserved word in their wrapper. - if (python_op_gen_internal::IsPythonReserved(function_name)) { + bool is_reserved = python_op_gen_internal::IsPythonReserved(function_name); + + // Prefix an op with underscore if the op is listed in hidden_ops or + // name is reserved or it is of the exceptions in IsOpWithUnderscorePrefix. + // Do not add underscores to ops set to HIDDEN in ApiDef otherwise. + // TODO(annarev): don't prefix with underscores even if op is in hidden_ops. + if (is_hidden) { + if (!hidden_by_api_def || is_reserved || + python_op_gen_internal::IsOpWithUnderscorePrefix(function_name)) { + function_name = strings::StrCat("_", function_name); + } + } else if (is_reserved) { + // When users create custom python wrappers, they may link in the + // default op registry by accident, and because they can't + // enumerate all 'hidden' symbols, this guard is to prevent + // instantiating a python reserved word in their wrapper. continue; } diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 52052ba77d..65ca801cbe 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -193,7 +193,7 @@ class FunctionTest(test.TestCase): @function.Defun(dtypes.float32, dtypes.float32) def XSquarePlusOneGrad(x, dy): - dx = functional_ops._symbolic_gradient( + dx = functional_ops.symbolic_gradient( input=[x, dy], Tout=[dtypes.float32], f="XSquarePlusOneFn", name="dx") return dx @@ -295,7 +295,7 @@ class FunctionTest(test.TestCase): # gradient function is (x, y, dz) -> (dx, dy). dx's shape # should be the same as x's; and dy's shape should be the same # as y's. - dx, dy = functional_ops._symbolic_gradient( + dx, dy = functional_ops.symbolic_gradient( input=[x, y, dz], Tout=[dtypes.float32] * 2, f="Foo") self.assertEqual(x.get_shape(), dx.get_shape()) self.assertEqual(y.get_shape(), dy.get_shape()) diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index 0421837d49..1cdd738198 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -47,46 +47,46 @@ class DeviceFunctionsTest(test.TestCase): def testTwoDeviceFunctions(self): with ops.Graph().as_default() as g: - var_0 = gen_state_ops._variable( + var_0 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_0", container="", shared_name="") with g.device(test_device_func_pin_variable_to_cpu): - var_1 = gen_state_ops._variable( + var_1 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_1", container="", shared_name="") - var_2 = gen_state_ops._variable( + var_2 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_2", container="", shared_name="") - var_3 = gen_state_ops._variable( + var_3 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_3", container="", shared_name="") with g.device(test_device_func_pin_variable_to_cpu): - var_4 = gen_state_ops._variable( + var_4 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_4", container="", shared_name="") with g.device("/device:GPU:0"): - var_5 = gen_state_ops._variable( + var_5 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_5", container="", shared_name="") - var_6 = gen_state_ops._variable( + var_6 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_6", diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 1f2dfb8d43..55576f0e88 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2892,7 +2892,7 @@ class OutputTypesTest(test_util.TensorFlowTestCase): with g.as_default(): x = constant_op.constant([1, 1, 2, 4, 4, 4, 7, 8, 8], dtype=dtypes.double) - y, _ = gen_array_ops._unique(x) + y, _ = gen_array_ops.unique(x) self.assertEqual([types_pb2.DT_DOUBLE, types_pb2.DT_INT32], y.op._output_types) # pylint: disable=protected-access diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index c95149d177..4813458f07 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -75,6 +75,38 @@ bool IsPythonReserved(const string& s) { return kPythonReserved->count(s) > 0; } +bool IsOpWithUnderscorePrefix(const string& s) { + static const std::set* const kUnderscoreOps = new std::set( + {// Lowercase built-in functions and types in Python, from: + // [x for x in dir(__builtins__) if x[0].islower()] + // These need to be excluded so they don't conflict with actual built-in + // functions since we use '*' imports. + "abs", "all", "any", "apply", "bin", "bool", "buffer", "bytearray", + "bytes", "callable", "chr", "classmethod", "cmp", "coerce", "compile", + "complex", "copyright", "credits", "delattr", "dict", "dir", "divmod", + "enumerate", "eval", "execfile", "exit", "file", "filter", "float", + "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", + "hex", "id", "input", "int", "intern", "isinstance", "issubclass", + "iter", "len", "license", "list", "locals", "long", "map", "max", + "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", + "print", "property", "quit", "range", "raw_input", "reduce", "reload", + "repr", "reversed", "round", "set", "setattr", "slice", "sorted", + "staticmethod", "str", "sum", "super", "tuple", "type", "unichr", + "unicode", "vars", "xrange", "zip", + // These have the same name as ops defined in Python and might be used + // incorrectly depending on order of '*' imports. + // TODO(annarev): reduce usage of '*' imports and remove these from the + // list. + "fused_batch_norm", "histogram_fixed_width", "stack", + "batch_norm_with_global_normalization", + // TODO(annarev): replace these ops in the next change. + "add_sparse_to_tensors_map", "add_many_sparse_to_tensors_map", + "broadcast_gradient_args", "concat", "enter", "histogram_summary", + "ref_enter", "ref_identity", "scalar_summary", + "take_many_sparse_from_tensors_map"}); + return kUnderscoreOps->count(s) > 0; +} + string AvoidPythonReserved(const string& s) { if (IsPythonReserved(s)) return strings::StrCat(s, "_"); return s; @@ -816,6 +848,7 @@ from tensorflow.python.util.tf_export import tf_export // An op is hidden if either its ApiDef visibility is HIDDEN // or it is in the hidden_ops list. bool is_hidden = api_def->visibility() == ApiDef::HIDDEN; + bool hidden_by_api_def = is_hidden; if (!is_hidden) { for (const string& hidden : hidden_ops) { if (op_def.name() == hidden) { @@ -828,13 +861,22 @@ from tensorflow.python.util.tf_export import tf_export string function_name; python_op_gen_internal::GenerateLowerCaseOpName(op_def.name(), &function_name); - if (is_hidden) function_name = strings::StrCat("_", function_name); - - // When users create custom python wrappers, they may link in the - // default op registry by accident, and because they can't - // enumerate all 'hidden' symbols, this guard is to prevent - // instantiating a python reserved word in their wrapper. - if (python_op_gen_internal::IsPythonReserved(function_name)) { + bool is_reserved = python_op_gen_internal::IsPythonReserved(function_name); + + // Prefix an op with underscore if the op is listed in hidden_ops or + // name is reserved or it is of the exceptions in IsOpWithUnderscorePrefix. + // Do not add underscores to ops set to HIDDEN in ApiDef otherwise. + // TODO(annarev): don't prefix with underscores even if op is in hidden_ops. + if (is_hidden) { + if (!hidden_by_api_def || is_reserved || + python_op_gen_internal::IsOpWithUnderscorePrefix(function_name)) { + function_name = strings::StrCat("_", function_name); + } + } else if (is_reserved) { + // When users create custom python wrappers, they may link in the + // default op registry by accident, and because they can't + // enumerate all 'hidden' symbols, this guard is to prevent + // instantiating a python reserved word in their wrapper. continue; } diff --git a/tensorflow/python/framework/python_op_gen_internal.h b/tensorflow/python/framework/python_op_gen_internal.h index 4319e5a782..e0cfb05f4b 100644 --- a/tensorflow/python/framework/python_op_gen_internal.h +++ b/tensorflow/python/framework/python_op_gen_internal.h @@ -29,6 +29,9 @@ namespace python_op_gen_internal { // Returns true if s is a Python keyword or built-in. bool IsPythonReserved(const string& s); +// Whether the op should be prefixed with underscore. +bool IsOpWithUnderscorePrefix(const string& s); + // Add a _ to the end of s if necessary to avoid a Python keyword or built-in. string AvoidPythonReserved(const string& s); diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index bea0ee34fd..6b1b3dd40c 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -768,7 +768,7 @@ class ConstantValueTest(test.TestCase): self.assertAllClose(np_val, tensor_util.constant_value(tf_val)) def testUnknown(self): - tf_val = gen_state_ops._variable( + tf_val = gen_state_ops.variable( shape=[3, 4, 7], dtype=dtypes.float32, name="tf_val", diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 0f51501740..5a84b16a23 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -321,7 +321,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) dim = array_ops.placeholder(dtype='int32') sizes = constant_op.constant([50, 10, 4], shape=[3]) - split = gen_array_ops._split_v( + split = gen_array_ops.split_v( value=conv, size_splits=sizes, axis=dim, num_split=3) output = math_ops.reduce_sum(split[0]) @@ -896,7 +896,7 @@ class LayoutOptimizerTest(test.TestCase): add = math_ops.add(conv, conv) mean = math_ops.reduce_mean(conv) condition = math_ops.less(conv, mean) - select = gen_math_ops._select(condition, conv, add) + select = gen_math_ops.select(condition, conv, add) output = array_ops.identity(select) with session.Session(config=_get_config(False)) as sess: @@ -926,7 +926,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) add = math_ops.add(conv, conv) condition = array_ops.placeholder(dtype='bool') - select = gen_math_ops._select(condition, conv, add) + select = gen_math_ops.select(condition, conv, add) output = array_ops.identity(select) condition_val = np.zeros((1, 7, 7, 64)) @@ -957,7 +957,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) add = math_ops.add(conv, conv) condition = constant_op.constant(True) - select = gen_math_ops._select(condition, conv, add) + select = gen_math_ops.select(condition, conv, add) output = array_ops.identity(select) with session.Session(config=_get_config(False)) as sess: @@ -1023,7 +1023,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) strides = array_ops.placeholder(dtype='int32', shape=[4]) - max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID') + max_pool = gen_nn_ops.max_pool_v2(conv, ksize, strides, 'VALID') output = array_ops.identity(max_pool) strides_val = [1, 3, 2, 1] diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 365cf72108..d35f62b186 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -1223,7 +1223,7 @@ class SnapshotOpTest(test_util.TensorFlowTestCase): for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]: with self.test_session(use_gpu=True): x = constant_op.constant([0, 1, 2, 3], dtype=dtype) - y = gen_array_ops._snapshot(x) + y = gen_array_ops.snapshot(x) self.assertAllEqual(y.eval(), [0, 1, 2, 3]) diff --git a/tensorflow/python/kernel_tests/batchtospace_op_test.py b/tensorflow/python/kernel_tests/batchtospace_op_test.py index 0c802476a0..6143cd3baa 100644 --- a/tensorflow/python/kernel_tests/batchtospace_op_test.py +++ b/tensorflow/python/kernel_tests/batchtospace_op_test.py @@ -44,7 +44,7 @@ class CppOpImpl(object): @staticmethod def batch_to_space(*args, **kwargs): - return gen_array_ops._batch_to_space(*args, **kwargs) + return gen_array_ops.batch_to_space(*args, **kwargs) class BatchToSpaceDepthToSpace(test.TestCase, PythonOpImpl): diff --git a/tensorflow/python/kernel_tests/bcast_ops_test.py b/tensorflow/python/kernel_tests/bcast_ops_test.py index 9e51234605..cb46fcb007 100644 --- a/tensorflow/python/kernel_tests/bcast_ops_test.py +++ b/tensorflow/python/kernel_tests/bcast_ops_test.py @@ -20,8 +20,8 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.ops.gen_array_ops import _broadcast_args from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args +from tensorflow.python.ops.gen_array_ops import broadcast_args from tensorflow.python.platform import test @@ -29,7 +29,7 @@ class BcastOpsTest(test.TestCase): def _GetBroadcastShape(self, xs, ys): with self.test_session() as sess: - return sess.run(_broadcast_args(xs, ys)) + return sess.run(broadcast_args(xs, ys)) def _GetGradientArgs(self, xs, ys): with self.test_session() as sess: diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py index a786d0a47e..7f147ba53a 100644 --- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py +++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py @@ -50,7 +50,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_no_vocab_changes(self): """Tests where vocab does not change at all.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.old_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=3, @@ -63,7 +63,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_shifted_vocab(self): """Tests where vocab is the same, but shifted / ordered differently.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.new_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=3, @@ -76,7 +76,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_offset(self): """Tests offset and num_new_vocab logic.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.new_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=1, @@ -89,7 +89,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_old_vocab_size(self): """Tests where old_vocab_size is specified.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.new_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=3, @@ -132,7 +132,7 @@ class LoadAndRemapMatrixTest(test.TestCase): # No column remapping, new weight matrix has second row, then first row. row_remapping = [1, 0] - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=row_remapping, @@ -147,7 +147,7 @@ class LoadAndRemapMatrixTest(test.TestCase): # No row remapping, new weight matrix has third col, then first col. row_remapping = list(range(self.old_num_rows)) col_remapping = [2, 0] - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=row_remapping, @@ -162,7 +162,7 @@ class LoadAndRemapMatrixTest(test.TestCase): # Both row and column remappings. row_remapping = [1, 0, 4] col_remapping = [1, 15] - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=row_remapping, @@ -177,7 +177,7 @@ class LoadAndRemapMatrixTest(test.TestCase): def test_load_and_remap_with_init(self): """Tests the op's load and remap where there are missing entries.""" init_val = 42 - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[2, -1, 0], @@ -196,7 +196,7 @@ class LoadAndRemapMatrixTest(test.TestCase): """Tests when all the rows are missing and need to be initialized.""" num_rows = 7 initializing_values = [42] * num_rows * self.old_num_cols - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[-1] * num_rows, @@ -214,7 +214,7 @@ class LoadAndRemapMatrixTest(test.TestCase): num_rows = 7 num_cols = 4 initializing_values = [42] * num_rows * num_cols - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[-1] * num_rows, @@ -235,7 +235,7 @@ class LoadAndRemapMatrixTest(test.TestCase): invalid_remapping = [1, 0, 0, 0, 1, 2] # Invalid row remapping. - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=invalid_remapping, @@ -247,7 +247,7 @@ class LoadAndRemapMatrixTest(test.TestCase): remapped_matrix.eval() # Invalid column remapping. - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=list(range(self.old_num_rows)), @@ -260,7 +260,7 @@ class LoadAndRemapMatrixTest(test.TestCase): def test_load_and_remap_incorrect_initializing_values(self): """Tests that errors are raised with incorrect number of init values.""" - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[2, -1, 0], @@ -275,7 +275,7 @@ class LoadAndRemapMatrixTest(test.TestCase): with self.test_session(), self.assertRaises(errors.InvalidArgumentError): remapped_matrix.eval() - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[2, -1, 0], @@ -314,7 +314,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase): num_rows, num_cols = np_value.shape # Tests loading the entire tensor (except reversed). - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, # Simply reverses the rows of the matrix. @@ -332,7 +332,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase): self.assertGreater(num_rows, 2) prefix_rows = 2 suffix_rows = 3 - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, # Reverses the rows of the matrix, then prepends and appends @@ -353,7 +353,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase): # Tests when everything is taken from initializing_values. new_rows = 7 initializing_values = [42] * new_rows * num_cols - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, # Nothing is loaded from the old tensor. diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index 127bc6bb20..81c6a4aa6e 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -526,7 +526,7 @@ class ConcatOpTest(test.TestCase): with self.test_session(use_gpu=True): t1 = [] t2 = [] - output = gen_array_ops._concat_v2([t1, t2], 0).eval() + output = gen_array_ops.concat_v2([t1, t2], 0).eval() self.assertFalse(output) # Checks that output is empty def testConcatInvalidAxis(self): @@ -534,20 +534,20 @@ class ConcatOpTest(test.TestCase): with self.test_session(use_gpu=True): t1 = [1] t2 = [2] - gen_array_ops._concat_v2([t1, t2], 1).eval() + gen_array_ops.concat_v2([t1, t2], 1).eval() def testConcatNegativeAxis(self): with self.test_session(use_gpu=True): t1 = [[1, 2, 3], [4, 5, 6]] t2 = [[7, 8, 9], [10, 11, 12]] - c = gen_array_ops._concat_v2([t1, t2], -2) + c = gen_array_ops.concat_v2([t1, t2], -2) self.assertEqual([4, 3], c.get_shape().as_list()) output = c.eval() self.assertAllEqual([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], output) - c = gen_array_ops._concat_v2([t1, t2], -1) + c = gen_array_ops.concat_v2([t1, t2], -1) self.assertEqual([2, 6], c.get_shape().as_list()) output = c.eval() self.assertAllEqual([[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]], output) @@ -615,7 +615,7 @@ class ConcatOffsetTest(test.TestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) s2 = constant_op.constant([2, 20, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]]) @@ -624,7 +624,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([[2, 3, 5]], dtypes.int32) s1 = constant_op.constant([[2, 7, 5]], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, r"should be a vector"): sess.run(off) @@ -634,7 +634,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(4, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, r"Concat dim is out of range: 4 vs. 3"): sess.run(off) @@ -644,7 +644,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5, 10], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, r"should contain 3 elem"): sess.run(off) @@ -654,7 +654,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 10], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp( errors_impl.InvalidArgumentError, r"All dimensions except 1 must match. Input 1 has shape \[2 7 10\] " @@ -667,7 +667,7 @@ class ConcatOffsetTest(test.TestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) s2 = constant_op.constant([2, 20, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]]) @@ -675,7 +675,7 @@ class ConcatOffsetTest(test.TestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([1, 3, 5], dtypes.int32) s2 = constant_op.constant([3, 3, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [2, 0, 0], [3, 0, 0]]) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 58f38650eb..b429fa5c42 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -552,7 +552,7 @@ class ControlFlowTest(test.TestCase): def testCondRef(self): with self.test_session(): - x = gen_state_ops._variable( + x = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="x", @@ -580,7 +580,7 @@ class ControlFlowTest(test.TestCase): def testUninitializedRefIdentity(self): with self.test_session() as sess: - v = gen_state_ops._variable( + v = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="v", @@ -1620,7 +1620,7 @@ class ControlFlowTest(test.TestCase): def testWhileStack_1(self): with self.test_session(): - s = gen_data_flow_ops._stack_v2(-1, dtypes.int32, stack_name="foo") + s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo") i = constant_op.constant(0) def c(i): @@ -1629,7 +1629,7 @@ class ControlFlowTest(test.TestCase): def b(i): ni = math_ops.add(i, 1) ni = control_flow_ops.with_dependencies( - [gen_data_flow_ops._stack_push_v2(s, i)], ni) + [gen_data_flow_ops.stack_push_v2(s, i)], ni) return ni r = control_flow_ops.while_loop(c, b, [i], parallel_iterations=1) @@ -1641,7 +1641,7 @@ class ControlFlowTest(test.TestCase): def b1(i, x): ni = math_ops.subtract(i, 1) - nx = x + gen_data_flow_ops._stack_pop_v2(s, dtypes.int32) + nx = x + gen_data_flow_ops.stack_pop_v2(s, dtypes.int32) return [ni, nx] _, rx = control_flow_ops.while_loop( diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 0d9b46c30d..8db0bb6f0d 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -495,11 +495,11 @@ class UnaryOpTest(test.TestCase): dtype_tols = [(np.float32, 5e-4), (np.float64, 1e-6), (np.complex64, 5e-4), (np.complex128, 1e-6)] op_range = [ - (gen_math_ops._reciprocal_grad, [-2, 2]), - (gen_math_ops._rsqrt_grad, [0.1, 3]), - (gen_math_ops._sigmoid_grad, [-2, 2]), - (gen_math_ops._sqrt_grad, [0.1, 3]), - (gen_math_ops._tanh_grad, [-2, 2]), + (gen_math_ops.reciprocal_grad, [-2, 2]), + (gen_math_ops.rsqrt_grad, [0.1, 3]), + (gen_math_ops.sigmoid_grad, [-2, 2]), + (gen_math_ops.sqrt_grad, [0.1, 3]), + (gen_math_ops.tanh_grad, [-2, 2]), ] def rand(dtype): diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py index 222038b22e..a52b2c0dc3 100644 --- a/tensorflow/python/kernel_tests/determinant_op_test.py +++ b/tensorflow/python/kernel_tests/determinant_op_test.py @@ -65,7 +65,7 @@ class DeterminantOpTest(test.TestCase): self._compareDeterminantBase(matrix_x, linalg_ops.matrix_determinant(matrix_x)) self._compareLogDeterminantBase( - matrix_x, gen_linalg_ops._log_matrix_determinant(matrix_x)) + matrix_x, gen_linalg_ops.log_matrix_determinant(matrix_x)) def testBasic(self): # 2x2 matrices diff --git a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py index feec9934e4..faac7d8365 100644 --- a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py +++ b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py @@ -347,7 +347,7 @@ class FractionalAvgPoolGradTest(test.TestCase): Two types of tests for FractionalAvgPoolGrad. 1) Test fractional_avg_pool_grad() directly. - This type of test relies on gen_nn_ops._avg_pool_grad() returns the + This type of test relies on gen_nn_ops.avg_pool_grad() returns the correct result. For example: * input_tensor_shape = (1, 10, 10, 1) * window_size = (1, 2, 2, 1) @@ -404,13 +404,13 @@ class FractionalAvgPoolGradTest(test.TestCase): num_elements *= dim_size output_backprop = (self._PRNG.rand(num_elements) * 1000).reshape(output_data.shape) - input_backprop_tensor = gen_nn_ops._avg_pool_grad( + input_backprop_tensor = gen_nn_ops.avg_pool_grad( input_tensor.get_shape(), output_backprop, window_size, stride_size, padding) input_backprop = input_backprop_tensor.eval() row_seq = list(range(0, num_rows + 1, row_window_size)) col_seq = list(range(0, num_cols + 1, col_window_size)) - fap_input_backprop_tensor = gen_nn_ops._fractional_avg_pool_grad( + fap_input_backprop_tensor = gen_nn_ops.fractional_avg_pool_grad( input_tensor.get_shape(), output_backprop, row_seq, @@ -443,7 +443,7 @@ class FractionalAvgPoolGradTest(test.TestCase): num_elements *= dim_size output_backprop = (self._PRNG.rand(num_elements) * 1000).reshape(output_data.shape) - input_backprop_tensor = gen_nn_ops._avg_pool_grad( + input_backprop_tensor = gen_nn_ops.avg_pool_grad( input_tensor.get_shape(), output_backprop, window_size, stride_size, padding) input_backprop = input_backprop_tensor.eval() @@ -451,7 +451,7 @@ class FractionalAvgPoolGradTest(test.TestCase): col_seq = list(range(0, num_cols, col_window_size - 1)) row_seq[-1] += 1 col_seq[-1] += 1 - fap_input_backprop_tensor = gen_nn_ops._fractional_avg_pool_grad( + fap_input_backprop_tensor = gen_nn_ops.fractional_avg_pool_grad( input_tensor.get_shape(), output_backprop, row_seq, diff --git a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py index 5983ae7759..6477c9ebc4 100644 --- a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py +++ b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py @@ -318,7 +318,7 @@ class FractionalMaxPoolGradTest(test.TestCase): Two types of tests for FractionalMaxPoolGrad. 1) Test fractional_max_pool_grad() directly. - This type of test relies on gen_nn_ops._max_pool_grad() returns the correct + This type of test relies on gen_nn_ops.max_pool_grad() returns the correct result. For example: * input_tensor_shape = (1, 10, 10, 1) * window_size = (1, 2, 2, 1) @@ -384,16 +384,13 @@ class FractionalMaxPoolGradTest(test.TestCase): stride_size, padding) output_data = output_tensor.eval() output_backprop = self._PRNG.randint(100, size=output_data.shape) - input_backprop_tensor = gen_nn_ops._max_pool_grad(input_tensor, - output_tensor, - output_backprop, - window_size, - stride_size, - padding) + input_backprop_tensor = gen_nn_ops.max_pool_grad( + input_tensor, output_tensor, output_backprop, window_size, + stride_size, padding) input_backprop = input_backprop_tensor.eval() row_seq = list(range(0, num_rows + 1, row_window_size)) col_seq = list(range(0, num_cols + 1, col_window_size)) - fmp_input_backprop_tensor = gen_nn_ops._fractional_max_pool_grad( + fmp_input_backprop_tensor = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, output_backprop, @@ -422,18 +419,15 @@ class FractionalMaxPoolGradTest(test.TestCase): stride_size, padding) output_data = output_tensor.eval() output_backprop = self._PRNG.randint(100, size=output_data.shape) - input_backprop_tensor = gen_nn_ops._max_pool_grad(input_tensor, - output_tensor, - output_backprop, - window_size, - stride_size, - padding) + input_backprop_tensor = gen_nn_ops.max_pool_grad( + input_tensor, output_tensor, output_backprop, window_size, + stride_size, padding) input_backprop = input_backprop_tensor.eval() row_seq = list(range(0, num_rows, row_window_size - 1)) col_seq = list(range(0, num_cols, col_window_size - 1)) row_seq[-1] += 1 col_seq[-1] += 1 - fmp_input_backprop_tensor = gen_nn_ops._fractional_max_pool_grad( + fmp_input_backprop_tensor = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, output_backprop, @@ -591,7 +585,7 @@ class FractionalMaxPoolGradTest(test.TestCase): output_tensor = constant_op.constant( output_data_not_overlapping, shape=output_size) grad = constant_op.constant(output_backprop, shape=output_size) - r = gen_nn_ops._fractional_max_pool_grad( + r = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, grad, @@ -606,7 +600,7 @@ class FractionalMaxPoolGradTest(test.TestCase): # Test when overlapping is True output_tensor = constant_op.constant( output_data_overlapping, shape=output_size) - r = gen_nn_ops._fractional_max_pool_grad( + r = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, grad, row_seq, col_seq, overlapping=True) input_backprop_overlapping = r.eval() self.assertShapeEqual( diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py index 6203a412d7..a0c66c77d8 100644 --- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py @@ -48,7 +48,7 @@ class ExponentialOpTest(test.TestCase): def _verifyExponential(self, x, np_type): inp = x.astype(np_type) with self.test_session(use_gpu=True): - tf_ans = gen_linalg_ops._matrix_exponential(inp) + tf_ans = gen_linalg_ops.matrix_exponential(inp) if x.size == 0: np_ans = np.empty(x.shape, dtype=np_type) else: @@ -116,13 +116,13 @@ class ExponentialOpTest(test.TestCase): # When the exponential of a non-square matrix is attempted we should return # an error with self.assertRaises(ValueError): - gen_linalg_ops._matrix_exponential(np.array([[1., 2., 3.], [3., 4., 5.]])) + gen_linalg_ops.matrix_exponential(np.array([[1., 2., 3.], [3., 4., 5.]])) def testWrongDimensions(self): # The input to the exponential should be at least a 2-dimensional tensor. tensor3 = constant_op.constant([1., 2.]) with self.assertRaises(ValueError): - gen_linalg_ops._matrix_exponential(tensor3) + gen_linalg_ops.matrix_exponential(tensor3) def testEmpty(self): self._verifyExponentialReal(np.empty([0, 2, 2])) @@ -143,8 +143,8 @@ class ExponentialOpTest(test.TestCase): with self.test_session(use_gpu=True) as sess: matrix1 = random_ops.random_normal([5, 5], seed=42) matrix2 = random_ops.random_normal([5, 5], seed=42) - expm1 = gen_linalg_ops._matrix_exponential(matrix1) - expm2 = gen_linalg_ops._matrix_exponential(matrix2) + expm1 = gen_linalg_ops.matrix_exponential(matrix1) + expm2 = gen_linalg_ops.matrix_exponential(matrix2) expm = sess.run([expm1, expm2]) self.assertAllEqual(expm[0], expm[1]) @@ -180,7 +180,7 @@ class MatrixExponentialBenchmark(test.Benchmark): session.Session() as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) - expm = gen_linalg_ops._matrix_exponential(matrix) + expm = gen_linalg_ops.matrix_exponential(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py index 18ed59828c..24edc4f59f 100644 --- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py @@ -39,8 +39,8 @@ class LogarithmOpTest(test.TestCase): inp = x.astype(np_type) with self.test_session(use_gpu=True): # Verify that expm(logm(A)) == A. - tf_ans = gen_linalg_ops._matrix_exponential( - gen_linalg_ops._matrix_logarithm(inp)) + tf_ans = gen_linalg_ops.matrix_exponential( + gen_linalg_ops.matrix_logarithm(inp)) out = tf_ans.eval() self.assertAllClose(inp, out, rtol=1e-4, atol=1e-3) @@ -85,14 +85,14 @@ class LogarithmOpTest(test.TestCase): # When the logarithm of a non-square matrix is attempted we should return # an error with self.assertRaises(ValueError): - gen_linalg_ops._matrix_logarithm( + gen_linalg_ops.matrix_logarithm( np.array([[1., 2., 3.], [3., 4., 5.]], dtype=np.complex64)) def testWrongDimensions(self): # The input to the logarithm should be at least a 2-dimensional tensor. tensor3 = constant_op.constant([1., 2.], dtype=dtypes.complex64) with self.assertRaises(ValueError): - gen_linalg_ops._matrix_logarithm(tensor3) + gen_linalg_ops.matrix_logarithm(tensor3) def testEmpty(self): self._verifyLogarithmComplex(np.empty([0, 2, 2], dtype=np.complex64)) @@ -115,8 +115,8 @@ class LogarithmOpTest(test.TestCase): random_ops.random_normal([5, 5], seed=42), dtypes.complex64) matrix2 = math_ops.cast( random_ops.random_normal([5, 5], seed=42), dtypes.complex64) - logm1 = gen_linalg_ops._matrix_logarithm(matrix1) - logm2 = gen_linalg_ops._matrix_logarithm(matrix2) + logm1 = gen_linalg_ops.matrix_logarithm(matrix1) + logm2 = gen_linalg_ops.matrix_logarithm(matrix2) logm = sess.run([logm1, logm2]) self.assertAllEqual(logm[0], logm[1]) @@ -152,7 +152,7 @@ class MatrixLogarithmBenchmark(test.Benchmark): session.Session() as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) - logm = gen_linalg_ops._matrix_logarithm(matrix) + logm = gen_linalg_ops.matrix_logarithm(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 4466beeec9..a0ac355b60 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -405,7 +405,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 3, 3, 3], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], @@ -427,7 +427,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 2, 3, 3], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], @@ -456,7 +456,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 2, 2, 1], ksize=[1, 1, 2, 1], strides=[1, 1, 1, 1], @@ -485,7 +485,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 1], ksize=[1, 2, 2, 1], strides=[1, 1, 2, 1], @@ -494,7 +494,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu, v2=v2) self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 1], ksize=[1, 2, 2, 1], strides=[1, 2, 1, 1], @@ -519,7 +519,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 4], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], @@ -554,7 +554,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 8, 8, 8], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], @@ -565,7 +565,7 @@ class PoolingTest(test.TestCase): def _testMaxPoolEmptyInput(self, use_gpu): self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[0, 8, 8, 8], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], @@ -600,7 +600,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 1, 1, 10], ksize=[1, 1, 1, 2], strides=[1, 1, 1, 2], @@ -626,7 +626,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 2, 2, 6], ksize=[1, 1, 1, 3], strides=[1, 1, 1, 3], @@ -648,7 +648,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 7, 7, 1], ksize=[1, 2, 2, 1], strides=[1, 3, 3, 1], @@ -689,7 +689,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 3, 3, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], @@ -699,7 +699,7 @@ class PoolingTest(test.TestCase): v2=v2) self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], @@ -764,8 +764,8 @@ class PoolingTest(test.TestCase): _, argmax_op = nn_ops.max_pool_with_argmax(t, ksize, strides, padding) argmax = argmax_op.eval() grad_in = constant_op.constant(tensor_output, shape=output_shape) - out_op = gen_nn_ops._max_pool_grad_with_argmax(t, grad_in, argmax, - ksize, strides, padding) + out_op = gen_nn_ops.max_pool_grad_with_argmax(t, grad_in, argmax, ksize, + strides, padding) gpu_val = out_op.eval() self.assertShapeEqual(gpu_val, out_op) with self.test_session(use_gpu=False): @@ -773,8 +773,8 @@ class PoolingTest(test.TestCase): out_op = nn_ops.max_pool(t, ksize, strides, padding) orig_out = out_op.eval() grad_in = constant_op.constant(tensor_output, shape=output_shape) - out_op = gen_nn_ops._max_pool_grad(t, orig_out, grad_in, ksize, strides, - padding) + out_op = gen_nn_ops.max_pool_grad(t, orig_out, grad_in, ksize, strides, + padding) cpu_val = out_op.eval() self.assertShapeEqual(cpu_val, out_op) # The CPU version accumulates its gradient on fp16, so it's less @@ -793,7 +793,7 @@ class PoolingTest(test.TestCase): _, argmax_op = nn_ops.max_pool_with_argmax(t, ksize, strides, padding) argmax = argmax_op.eval() grad_in = constant_op.constant(tensor_input, shape=input_shape) - out_op = gen_nn_ops._max_pool_grad_grad_with_argmax( + out_op = gen_nn_ops.max_pool_grad_grad_with_argmax( t, grad_in, argmax, ksize, strides, padding) gpu_val = out_op.eval() self.assertShapeEqual(gpu_val, out_op) @@ -802,8 +802,8 @@ class PoolingTest(test.TestCase): out_op = nn_ops.max_pool(t, ksize, strides, padding) orig_out = out_op.eval() grad_in = constant_op.constant(tensor_input, shape=input_shape) - out_op = gen_nn_ops._max_pool_grad_grad(t, orig_out, grad_in, ksize, - strides, padding) + out_op = gen_nn_ops.max_pool_grad_grad(t, orig_out, grad_in, ksize, + strides, padding) cpu_val = out_op.eval() self.assertShapeEqual(cpu_val, out_op) # The CPU version accumulates its gradient on fp16, so it's less @@ -842,7 +842,7 @@ class PoolingTest(test.TestCase): t = constant_op.constant(tensor_input, shape=[1, 2, 2, 1]) argmax = constant_op.constant( tensor_argmax, shape=[1, 2, 2, 1], dtype=dtypes.int64) - out_op = gen_nn_ops._max_pool_grad_with_argmax( + out_op = gen_nn_ops.max_pool_grad_with_argmax( orig_in, t, argmax, @@ -865,7 +865,7 @@ class PoolingTest(test.TestCase): t = constant_op.constant(tensor_input, shape=[1, 3, 3, 1]) argmax = constant_op.constant( tensor_argmax, shape=[1, 2, 2, 1], dtype=dtypes.int64) - out_op = gen_nn_ops._max_pool_grad_grad_with_argmax( + out_op = gen_nn_ops.max_pool_grad_grad_with_argmax( orig_in, t, argmax, @@ -1029,7 +1029,7 @@ class PoolingTest(test.TestCase): self.assertLess(err, err_tolerance) def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[1, 3, 3, 1], @@ -1043,7 +1043,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding2_1_6(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 6, 6, 3], @@ -1057,7 +1057,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding2_1_7(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 7, 7, 3], @@ -1071,7 +1071,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding1_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[1, 3, 3, 1], @@ -1085,7 +1085,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 2, 3], @@ -1099,7 +1099,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1113,7 +1113,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding1_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1127,7 +1127,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding2_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1141,7 +1141,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1155,7 +1155,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding3_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[1, 7, 7, 1], @@ -1199,7 +1199,7 @@ class PoolingTest(test.TestCase): Returns: A Tensor. """ - pool_func = gen_nn_ops.max_pool_grad_v2 if v2 else gen_nn_ops._max_pool_grad + pool_func = gen_nn_ops.max_pool_grad_v2 if v2 else gen_nn_ops.max_pool_grad return pool_func(orig_input, orig_output, grad, [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) @@ -1208,7 +1208,7 @@ class PoolingTest(test.TestCase): expected_input_backprop, input_sizes, output_sizes, window_rows, window_cols, row_stride, col_stride, padding, use_gpu, v2): - pool_func = gen_nn_ops._max_pool_v2 if v2 else nn_ops.max_pool + pool_func = gen_nn_ops.max_pool_v2 if v2 else nn_ops.max_pool with self.test_session(use_gpu=use_gpu): input_tensor = constant_op.constant(input_data, shape=input_sizes) output_tensor = pool_func(input_tensor, [1, window_rows, window_cols, 1], @@ -1504,7 +1504,7 @@ class PoolingTest(test.TestCase): self._testMaxPoolGradDirectWithNans2_2() def _testMaxPoolGradGradValidPadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[1, 3, 3, 1], @@ -1518,7 +1518,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradValidPadding2_1_6(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 6, 6, 3], @@ -1532,7 +1532,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradValidPadding2_1_7(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 7, 7, 3], @@ -1546,7 +1546,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradValidPadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 2, 3], @@ -1560,7 +1560,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1574,7 +1574,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding2_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1588,7 +1588,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1602,7 +1602,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding3_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[1, 7, 7, 1], @@ -1644,7 +1644,7 @@ class PoolingTest(test.TestCase): Returns: A Tensor. """ - return gen_nn_ops._max_pool_grad_grad( + return gen_nn_ops.max_pool_grad_grad( orig_input, orig_output, grad, [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) diff --git a/tensorflow/python/kernel_tests/save_restore_ops_test.py b/tensorflow/python/kernel_tests/save_restore_ops_test.py index 1bdfa9ebd8..cb9aa1e34d 100644 --- a/tensorflow/python/kernel_tests/save_restore_ops_test.py +++ b/tensorflow/python/kernel_tests/save_restore_ops_test.py @@ -31,11 +31,10 @@ class ShardedFileOpsTest(test.TestCase): with session.Session( target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})): self.assertEqual( - gen_io_ops._sharded_filename("foo", 4, 100).eval(), + gen_io_ops.sharded_filename("foo", 4, 100).eval(), b"foo-00004-of-00100") self.assertEqual( - gen_io_ops._sharded_filespec("foo", 100).eval(), - b"foo-?????-of-00100") + gen_io_ops.sharded_filespec("foo", 100).eval(), b"foo-?????-of-00100") class ShapeInferenceTest(test.TestCase): @@ -53,7 +52,7 @@ class ShapeInferenceTest(test.TestCase): [dtypes.float32, dtypes.float32]) def testRestoreSlice(self): - op = gen_io_ops._restore_slice("model", "var", "3 4 0,1:-", dtypes.float32) + op = gen_io_ops.restore_slice("model", "var", "3 4 0,1:-", dtypes.float32) self.assertEqual([1, 4], op.get_shape()) diff --git a/tensorflow/python/kernel_tests/scalar_test.py b/tensorflow/python/kernel_tests/scalar_test.py index e65241981e..0d8fd23294 100644 --- a/tensorflow/python/kernel_tests/scalar_test.py +++ b/tensorflow/python/kernel_tests/scalar_test.py @@ -92,11 +92,11 @@ class ScalarTest(test.TestCase): self.check(array_ops.reshape, (7, 1), 'sizes input must be 1-D', [7]) def testShardedFilename(self): - self.check(gen_io_ops._sharded_filename, ('foo', 4, [100]), + self.check(gen_io_ops.sharded_filename, ('foo', 4, [100]), 'must be a scalar', b'foo-00004-of-00100') def testShardedFilespec(self): - self.check(gen_io_ops._sharded_filespec, ('foo', [100]), 'must be a scalar', + self.check(gen_io_ops.sharded_filespec, ('foo', [100]), 'must be a scalar', b'foo-?????-of-00100') def testUnsortedSegmentSum(self): diff --git a/tensorflow/python/kernel_tests/spacetobatch_op_test.py b/tensorflow/python/kernel_tests/spacetobatch_op_test.py index b943dfa4e5..2a9232b6ae 100644 --- a/tensorflow/python/kernel_tests/spacetobatch_op_test.py +++ b/tensorflow/python/kernel_tests/spacetobatch_op_test.py @@ -86,11 +86,11 @@ class CppOpImpl(object): @staticmethod def space_to_batch(*args, **kwargs): - return gen_array_ops._space_to_batch(*args, **kwargs) + return gen_array_ops.space_to_batch(*args, **kwargs) @staticmethod def batch_to_space(*args, **kwargs): - return gen_array_ops._batch_to_space(*args, **kwargs) + return gen_array_ops.batch_to_space(*args, **kwargs) class SpaceToBatchTest(test.TestCase, PythonOpImpl): diff --git a/tensorflow/python/kernel_tests/sparse_xent_op_test.py b/tensorflow/python/kernel_tests/sparse_xent_op_test.py index cd5b711a0e..a841fe83a7 100644 --- a/tensorflow/python/kernel_tests/sparse_xent_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_xent_op_test.py @@ -64,7 +64,7 @@ class SparseXentTest(test.TestCase): def _testXent(self, np_features, np_labels): np_loss, np_backprop = self._npXent(np_features, np_labels) with self.test_session(use_gpu=True) as sess: - loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( np_features, np_labels) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllCloseAccordingToType(np_loss, tf_loss) @@ -73,7 +73,7 @@ class SparseXentTest(test.TestCase): def testSingleClass(self): for label_dtype in np.int32, np.int64: with self.test_session(use_gpu=True) as sess: - loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( np.array([[1.], [-1.], [0.]]).astype(np.float32), np.array([0, 0, 0]).astype(label_dtype)) tf_loss, tf_backprop = sess.run([loss, backprop]) @@ -87,8 +87,9 @@ class SparseXentTest(test.TestCase): if test.is_built_with_cuda() and test.is_gpu_available(): with self.test_session(use_gpu=True) as sess: - loss, backprop = (gen_nn_ops._sparse_softmax_cross_entropy_with_logits( - features, labels)) + loss, backprop = ( + gen_nn_ops.sparse_softmax_cross_entropy_with_logits( + features, labels)) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllClose( [[np.nan] * 4, [0.25, 0.25, 0.25, -0.75], @@ -100,8 +101,8 @@ class SparseXentTest(test.TestCase): [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3) with self.test_session(use_gpu=False) as sess: - loss, backprop = (gen_nn_ops._sparse_softmax_cross_entropy_with_logits( - features, labels)) + loss, backprop = ( + gen_nn_ops.sparse_softmax_cross_entropy_with_logits(features, labels)) with self.assertRaisesOpError("Received a label value of"): sess.run([loss, backprop]) diff --git a/tensorflow/python/kernel_tests/stack_ops_test.py b/tensorflow/python/kernel_tests/stack_ops_test.py index aa409336f5..afd2eaffab 100644 --- a/tensorflow/python/kernel_tests/stack_ops_test.py +++ b/tensorflow/python/kernel_tests/stack_ops_test.py @@ -34,11 +34,11 @@ class StackOpTest(test.TestCase): def _testStackPushPop(self, use_gpu): with self.test_session(use_gpu=use_gpu): - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push_v2(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose([[4.0, 5.0]], c1.eval()) def testStackPushPop(self): @@ -49,11 +49,11 @@ class StackOpTest(test.TestCase): with self.test_session(use_gpu=use_gpu): a = np.arange(2000) x = constant_op.constant(a, dtype=dtypes.float32) - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, x, swap_memory=True) + c = gen_data_flow_ops.stack_push_v2(h, x, swap_memory=True) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose(a, c1.eval()) def testStackPushPopSwap(self): @@ -63,7 +63,7 @@ class StackOpTest(test.TestCase): def _testStackWhileSwap(self, use_gpu): with self.test_session(use_gpu=use_gpu): n = constant_op.constant(0) - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") def c(x): @@ -72,7 +72,7 @@ class StackOpTest(test.TestCase): def b(x): with ops.control_dependencies([x]): a = constant_op.constant(np.ones(2000), dtype=dtypes.float32) - v = gen_data_flow_ops._stack_push_v2(h, a, swap_memory=True) + v = gen_data_flow_ops.stack_push_v2(h, a, swap_memory=True) with ops.control_dependencies([v]): return math_ops.add(x, 1) @@ -86,7 +86,7 @@ class StackOpTest(test.TestCase): def b1(x, y): nx = math_ops.subtract(x, 1) - ny = y + gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + ny = y + gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) return [nx, ny] _, ry = control_flow_ops.while_loop( @@ -99,16 +99,16 @@ class StackOpTest(test.TestCase): def _testMultiStack(self, use_gpu): with self.test_session(use_gpu=use_gpu): - h1 = gen_data_flow_ops._stack_v2( + h1 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, 4.0) + c1 = gen_data_flow_ops.stack_push_v2(h1, 4.0) with ops.control_dependencies([c1]): - c1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - h2 = gen_data_flow_ops._stack_v2( + c1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + h2 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="bar") - c2 = gen_data_flow_ops._stack_push_v2(h2, 5.0) + c2 = gen_data_flow_ops.stack_push_v2(h2, 5.0) with ops.control_dependencies([c2]): - c2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + c2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) r = c1 + c2 self.assertAllClose(9.0, r.eval()) @@ -119,17 +119,17 @@ class StackOpTest(test.TestCase): def _testSameNameStacks(self, use_gpu): """Different stacks with the same name do not interfere.""" with self.test_session(use_gpu=use_gpu) as sess: - h1 = gen_data_flow_ops._stack_v2( + h1 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - h2 = gen_data_flow_ops._stack_v2( + h2 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, 4.0) + c1 = gen_data_flow_ops.stack_push_v2(h1, 4.0) with ops.control_dependencies([c1]): - c2 = gen_data_flow_ops._stack_push_v2(h2, 5.0) + c2 = gen_data_flow_ops.stack_push_v2(h2, 5.0) with ops.control_dependencies([c2]): - pop1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - pop2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + pop1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + pop2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) out1, out2 = sess.run([pop1, pop2]) self.assertAllClose(out1, 4.0) @@ -141,9 +141,9 @@ class StackOpTest(test.TestCase): def _testCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_close_v2(h) + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1) def testCloseStack(self): @@ -152,11 +152,11 @@ class StackOpTest(test.TestCase): def _testPushCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push_v2(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_close_v2(h) + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1) def testPushCloseStack(self): @@ -170,9 +170,9 @@ class StackOpRefTest(test.TestCase): def _testStackPushPop(self, use_gpu): with self.test_session(use_gpu=use_gpu): h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop(h, dtypes.float32) self.assertAllClose([[4.0, 5.0]], c1.eval()) def testStackPushPop(self): @@ -184,9 +184,9 @@ class StackOpRefTest(test.TestCase): a = np.arange(2000) x = constant_op.constant(a, dtype=dtypes.float32) h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push(h, x, swap_memory=True) + c = gen_data_flow_ops.stack_push(h, x, swap_memory=True) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop(h, dtypes.float32) self.assertAllClose(a, c1.eval()) def testStackPushPopSwap(self): @@ -196,13 +196,13 @@ class StackOpRefTest(test.TestCase): def _testMultiStack(self, use_gpu): with self.test_session(use_gpu=use_gpu): h1 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push(h1, 4.0) + c1 = gen_data_flow_ops.stack_push(h1, 4.0) with ops.control_dependencies([c1]): - c1 = gen_data_flow_ops._stack_pop(h1, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop(h1, dtypes.float32) h2 = gen_data_flow_ops._stack(dtypes.float32, stack_name="bar") - c2 = gen_data_flow_ops._stack_push(h2, 5.0) + c2 = gen_data_flow_ops.stack_push(h2, 5.0) with ops.control_dependencies([c2]): - c2 = gen_data_flow_ops._stack_pop(h2, dtypes.float32) + c2 = gen_data_flow_ops.stack_pop(h2, dtypes.float32) r = c1 + c2 self.assertAllClose(9.0, r.eval()) @@ -217,7 +217,7 @@ class StackOpRefTest(test.TestCase): def b(x): with ops.control_dependencies([x]): a = constant_op.constant(np.ones(2000), dtype=dtypes.float32) - v = gen_data_flow_ops._stack_push(h, a, swap_memory=True) + v = gen_data_flow_ops.stack_push(h, a, swap_memory=True) with ops.control_dependencies([v]): return math_ops.add(x, 1) @@ -231,7 +231,7 @@ class StackOpRefTest(test.TestCase): def b1(x, y): nx = math_ops.subtract(x, 1) - ny = y + gen_data_flow_ops._stack_pop(h, dtypes.float32) + ny = y + gen_data_flow_ops.stack_pop(h, dtypes.float32) return [nx, ny] _, ry = control_flow_ops.while_loop( @@ -249,9 +249,9 @@ class StackOpRefTest(test.TestCase): def _testSameNameStacks(self, use_gpu): with self.test_session(use_gpu=use_gpu): h1 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push(h1, 4.0) + c1 = gen_data_flow_ops.stack_push(h1, 4.0) h2 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c2 = gen_data_flow_ops._stack_push(h2, 5.0) + c2 = gen_data_flow_ops.stack_push(h2, 5.0) _ = c1 + c2 self.assertNotEqual(h1.eval()[1], h2.eval()[1]) @@ -262,7 +262,7 @@ class StackOpRefTest(test.TestCase): def _testCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_close(h) + c1 = gen_data_flow_ops.stack_close(h) sess.run(c1) def testCloseStack(self): @@ -272,9 +272,9 @@ class StackOpRefTest(test.TestCase): def _testPushCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_close(h) + c1 = gen_data_flow_ops.stack_close(h) sess.run(c1) def testPushCloseStack(self): diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index aad2443eea..8f09f3d78b 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -437,7 +437,7 @@ class TensorArrayTest(test.TestCase): # Test reading wrong datatype, which is only possible in graph mode if context.in_graph_mode(): - r0_bad = gen_data_flow_ops._tensor_array_read_v3( + r0_bad = gen_data_flow_ops.tensor_array_read_v3( handle=w0.handle, index=0, dtype=dtypes.float64, flow_in=w0.flow) with self.assertRaisesOpError( "TensorArray dtype is float but Op requested dtype double."): diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 6366d2e181..173d95b258 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -66,9 +66,9 @@ class UniqueTest(test.TestCase): for dtype in [np.int32, np.int64]: x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) with self.test_session() as sess: - y0, idx0 = gen_array_ops._unique_v2(x, axis=np.array([0], dtype)) + y0, idx0 = gen_array_ops.unique_v2(x, axis=np.array([0], dtype)) tf_y0, tf_idx0 = sess.run([y0, idx0]) - y1, idx1 = gen_array_ops._unique_v2(x, axis=np.array([1], dtype)) + y1, idx1 = gen_array_ops.unique_v2(x, axis=np.array([1], dtype)) tf_y1, tf_idx1 = sess.run([y1, idx1]) self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) self.assertAllEqual(tf_idx0, np.array([0, 0, 1])) @@ -80,7 +80,7 @@ class UniqueTest(test.TestCase): # by default, the axis will be wrapped to allow `axis=None`. x = np.random.randint(2, high=10, size=7000) with self.test_session() as sess: - y, idx = gen_array_ops._unique_v2(x, axis=np.array([], np.int32)) + y, idx = gen_array_ops.unique_v2(x, axis=np.array([], np.int32)) tf_y, tf_idx = sess.run([y, idx]) self.assertEqual(len(x), len(tf_idx)) diff --git a/tensorflow/python/kernel_tests/variable_ops_test.py b/tensorflow/python/kernel_tests/variable_ops_test.py index 79071029fd..cf369c0718 100644 --- a/tensorflow/python/kernel_tests/variable_ops_test.py +++ b/tensorflow/python/kernel_tests/variable_ops_test.py @@ -165,26 +165,26 @@ class VariableOpTest(test.TestCase): def testTemporaryVariable(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable( + var = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="foo") var = state_ops.assign(var, [[4.0, 5.0]]) var = state_ops.assign_add(var, [[6.0, 7.0]]) - final = gen_state_ops._destroy_temporary_variable(var, var_name="foo") + final = gen_state_ops.destroy_temporary_variable(var, var_name="foo") self.assertAllClose([[10.0, 12.0]], final.eval()) def testDestroyNonexistentTemporaryVariable(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable([1, 2], dtypes.float32) - final = gen_state_ops._destroy_temporary_variable(var, var_name="bad") + var = gen_state_ops.temporary_variable([1, 2], dtypes.float32) + final = gen_state_ops.destroy_temporary_variable(var, var_name="bad") with self.assertRaises(errors.NotFoundError): final.eval() def testDuplicateTemporaryVariable(self): with self.test_session(use_gpu=True): - var1 = gen_state_ops._temporary_variable( + var1 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="dup") var1 = state_ops.assign(var1, [[1.0, 2.0]]) - var2 = gen_state_ops._temporary_variable( + var2 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="dup") var2 = state_ops.assign(var2, [[3.0, 4.0]]) final = var1 + var2 @@ -193,25 +193,25 @@ class VariableOpTest(test.TestCase): def testDestroyTemporaryVariableTwice(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable([1, 2], dtypes.float32) - val1 = gen_state_ops._destroy_temporary_variable(var, var_name="dup") - val2 = gen_state_ops._destroy_temporary_variable(var, var_name="dup") + var = gen_state_ops.temporary_variable([1, 2], dtypes.float32) + val1 = gen_state_ops.destroy_temporary_variable(var, var_name="dup") + val2 = gen_state_ops.destroy_temporary_variable(var, var_name="dup") final = val1 + val2 with self.assertRaises(errors.NotFoundError): final.eval() def testTemporaryVariableNoLeak(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable( + var = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="bar") final = array_ops.identity(var) final.eval() def testTwoTemporaryVariablesNoLeaks(self): with self.test_session(use_gpu=True): - var1 = gen_state_ops._temporary_variable( + var1 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="var1") - var2 = gen_state_ops._temporary_variable( + var2 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="var2") final = var1 + var2 final.eval() diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index b16c8c002c..27599868b7 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -687,7 +687,7 @@ class VariableContainerTest(test.TestCase): v1 = variables.Variable([1]) with ops.container("l2"): v2 = variables.Variable([2]) - special_v = gen_state_ops._variable( + special_v = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="VariableInL3", diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index e152f02d8e..e3e120a4eb 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -48,7 +48,7 @@ class XentTest(test.TestCase): def _testXent(self, np_features, np_labels, use_gpu=False): np_loss, np_backprop = self._npXent(np_features, np_labels) with self.test_session(use_gpu=use_gpu) as sess: - loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( np_features, np_labels) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllCloseAccordingToType(np_loss, tf_loss) @@ -71,7 +71,7 @@ class XentTest(test.TestCase): def _testSingleClass(self, use_gpu=False): for dtype in np.float16, np.float32: with self.test_session(use_gpu=use_gpu) as sess: - loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( np.array([[1.], [-1.], [0.]]).astype(dtype), np.array([[-1.], [0.], [1.]]).astype(dtype)) tf_loss, tf_backprop = sess.run([loss, backprop]) @@ -89,7 +89,7 @@ class XentTest(test.TestCase): np_labels = np.array([[[0., 0., 0., 1.]], [[0., .5, .5, 0.]]]).astype(dtype) self.assertRaisesRegexp(ValueError, "must be rank 2", - gen_nn_ops._softmax_cross_entropy_with_logits, + gen_nn_ops.softmax_cross_entropy_with_logits, np_features, np_labels) def testNpXent(self): @@ -131,14 +131,14 @@ class XentTest(test.TestCase): def testShapeMismatch(self): with self.test_session(): with self.assertRaises(ValueError): - gen_nn_ops._softmax_cross_entropy_with_logits( + gen_nn_ops.softmax_cross_entropy_with_logits( [[0., 1.], [2., 3.]], [[0., 1., 0.], [1., 0., 0.]]) def testNotMatrix(self): with self.test_session(): with self.assertRaises(ValueError): - gen_nn_ops._softmax_cross_entropy_with_logits([0., 1., 2., 3.], - [0., 1., 0., 1.]) + gen_nn_ops.softmax_cross_entropy_with_logits([0., 1., 2., 3.], + [0., 1., 0., 1.]) def testHalf(self): self._testAll( diff --git a/tensorflow/python/ops/accumulate_n_benchmark.py b/tensorflow/python/ops/accumulate_n_benchmark.py index c58d36f397..a709066cae 100644 --- a/tensorflow/python/ops/accumulate_n_benchmark.py +++ b/tensorflow/python/ops/accumulate_n_benchmark.py @@ -39,7 +39,7 @@ from tensorflow.python.platform import test class AccumulateNBenchmark(test.Benchmark): def _AccumulateNTemplate(self, inputs, init, shape, validate_shape): - var = gen_state_ops._temporary_variable( + var = gen_state_ops.temporary_variable( shape=shape, dtype=inputs[0].dtype.base_dtype) ref = state_ops.assign(var, init, validate_shape=validate_shape) update_ops = [ @@ -47,8 +47,7 @@ class AccumulateNBenchmark(test.Benchmark): ref, tensor, use_locking=True).op for tensor in inputs ] with ops.control_dependencies(update_ops): - return gen_state_ops._destroy_temporary_variable( - ref, var_name=var.op.name) + return gen_state_ops.destroy_temporary_variable(ref, var_name=var.op.name) def _AccumulateNInitializedWithFirst(self, inputs): return self._AccumulateNTemplate( @@ -60,7 +59,7 @@ class AccumulateNBenchmark(test.Benchmark): def _AccumulateNInitializedWithMerge(self, inputs): return self._AccumulateNTemplate( inputs, - init=array_ops.zeros_like(gen_control_flow_ops._merge(inputs)[0]), + init=array_ops.zeros_like(gen_control_flow_ops.merge(inputs)[0]), shape=tensor_shape.vector(0), validate_shape=False) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 9745d38dc2..925cf8ef32 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -139,7 +139,6 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): # on CPUs and a Maxwell TitanX. A speedup was seen in a large majority of # cases when switching implementations at N=16, but it is possible that # there will be a small number of performance regressions. - # pylint: disable=protected-access if len(sizes) > 16: # extract the size of each input along the concat dimension sizes = array_ops.squeeze( @@ -148,10 +147,9 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): [1, -1])) out_grads = array_ops.split(grad, sizes, non_neg_concat_dim) else: - offset = gen_array_ops._concat_offset(non_neg_concat_dim, sizes) + offset = gen_array_ops.concat_offset(non_neg_concat_dim, sizes) for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) - # pylint: enable=protected-access elif isinstance(grad, ops.IndexedSlices): # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. @@ -627,9 +625,7 @@ def _ReverseSequenceGrad(op, grad): @ops.RegisterGradient("Reverse") def _ReverseGrad(op, grad): reverse_dims = op.inputs[1] - # pylint: disable=protected-access - return gen_array_ops._reverse(grad, reverse_dims), None - # pylint: enable=protected-access + return gen_array_ops.reverse(grad, reverse_dims), None @ops.RegisterGradient("ReverseV2") @@ -700,17 +696,13 @@ ops.NotDifferentiable("OneHot") @ops.RegisterGradient("MirrorPad") def _MirrorPadGrad(op, grad): mode = op.get_attr("mode") - # pylint: disable=protected-access - return [gen_array_ops._mirror_pad_grad(grad, op.inputs[1], mode=mode), None] - # pylint: enable=protected-access + return [gen_array_ops.mirror_pad_grad(grad, op.inputs[1], mode=mode), None] @ops.RegisterGradient("MirrorPadGrad") def _MirrorPadGradGrad(op, grad): mode = op.get_attr("mode") - # pylint: disable=protected-access - return [gen_array_ops._mirror_pad(grad, op.inputs[1], mode=mode), None] - # pylint: enable=protected-access + return [gen_array_ops.mirror_pad(grad, op.inputs[1], mode=mode), None] @ops.RegisterGradient("QuantizeAndDequantize") diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 3db3d84475..cc559695ed 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -198,7 +198,7 @@ def expand_dims(input, axis=None, name=None, dim=None): if axis is not None: raise ValueError("can't specify both 'dim' and 'axis'") axis = dim - return gen_array_ops._expand_dims(input, axis, name) + return gen_array_ops.expand_dims(input, axis, name) # pylint: enable=redefined-builtin,protected-access @@ -211,28 +211,25 @@ def expand_dims(input, axis=None, name=None, dim=None): "This op will be removed after the deprecation date. " "Please switch to tf.setdiff1d().") def listdiff(x, y, out_idx=None, name=None): - return gen_array_ops._list_diff(x, y, out_idx, name) + return gen_array_ops.list_diff(x, y, out_idx, name) -listdiff.__doc__ = gen_array_ops._list_diff.__doc__ + "\n" + listdiff.__doc__ +listdiff.__doc__ = gen_array_ops.list_diff.__doc__ + "\n" + listdiff.__doc__ # pylint: enable=protected-access -# pylint: disable=undefined-variable,protected-access +# pylint: disable=undefined-variable @tf_export("setdiff1d") def setdiff1d(x, y, index_dtype=dtypes.int32, name=None): - return gen_array_ops._list_diff(x, y, index_dtype, name) + return gen_array_ops.list_diff(x, y, index_dtype, name) -setdiff1d.__doc__ = gen_array_ops._list_diff.__doc__ - -# pylint: enable=protected-access +setdiff1d.__doc__ = gen_array_ops.list_diff.__doc__ @tf_export("broadcast_dynamic_shape") def broadcast_dynamic_shape(shape_x, shape_y): - # pylint: disable=protected-access """Returns the broadcasted dynamic shape between `shape_x` and `shape_y`. Args: @@ -242,8 +239,7 @@ def broadcast_dynamic_shape(shape_x, shape_y): Returns: A rank 1 integer `Tensor` representing the broadcasted shape. """ - return gen_array_ops._broadcast_args(shape_x, shape_y) - # pylint: enable=protected-access + return gen_array_ops.broadcast_args(shape_x, shape_y) @tf_export("broadcast_static_shape") @@ -399,7 +395,7 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32): with ops.name_scope(name, "Size", [input]) as name: if isinstance(input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): - return gen_math_ops._prod( + return gen_math_ops.prod( gen_math_ops.cast(input.dense_shape, out_type), 0, name=name) else: input_tensor = ops.convert_to_tensor(input) @@ -892,7 +888,7 @@ def parallel_stack(values, name="parallel_stack"): output_shape = tensor_shape.TensorShape([len(values)]) output_shape = output_shape.concatenate(value_shape) # expand_dims converts concat to stack. - return gen_array_ops._parallel_concat( + return gen_array_ops.parallel_concat( [expand_dims(value, 0) for value in values], shape=output_shape) @@ -950,7 +946,7 @@ def stack(values, axis=0, name="stack"): raise ValueError("axis = %d not in [%d, %d)" % (axis, -expanded_num_dims, expanded_num_dims)) - return gen_array_ops._pack(values, axis=axis, name=name) + return gen_array_ops.pack(values, axis=axis, name=name) # pylint: disable=invalid-name @@ -994,7 +990,7 @@ def _autopacking_helper(list_or_tuple, dtype, name): # convertible-to-tensor types, such as numpy arrays. elems_as_tensors.append( constant_op.constant(elem, dtype=dtype, name=str(i))) - return gen_array_ops._pack(elems_as_tensors, name=scope) + return gen_array_ops.pack(elems_as_tensors, name=scope) else: return converted_elems @@ -1089,7 +1085,7 @@ def unstack(value, num=None, axis=0, name="unstack"): num = value_shape[axis].value if num is None: raise ValueError("Cannot infer num from shape %s" % value_shape) - return gen_array_ops._unpack(value, num=num, axis=axis, name=name) + return gen_array_ops.unpack(value, num=num, axis=axis, name=name) @tf_export("concat") @@ -1186,7 +1182,7 @@ def concat(values, axis, name="concat"): dtype=dtypes.int32).get_shape().assert_is_compatible_with( tensor_shape.scalar()) return identity(values[0], name=scope) - return gen_array_ops._concat_v2(values=values, axis=axis, name=name) + return gen_array_ops.concat_v2(values=values, axis=axis, name=name) @tf_export("boolean_mask") @@ -1254,8 +1250,7 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): axis = 0 if axis is None else axis shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod( - shape(tensor)[axis:axis + ndims_mask], [0]) + leading_size = gen_math_ops.prod(shape(tensor)[axis:axis + ndims_mask], [0]) tensor = reshape(tensor, concat([ shape(tensor)[:axis], [leading_size], @@ -1319,10 +1314,10 @@ def unique(x, out_idx=dtypes.int32, name=None): # period (3 weeks) pass. # TODO(yongtang): The documentation should also # be updated when switch to v2. - return gen_array_ops._unique(x, out_idx, name) + return gen_array_ops.unique(x, out_idx, name) -unique.__doc__ = gen_array_ops._unique.__doc__ +unique.__doc__ = gen_array_ops.unique.__doc__ @tf_export("split") @@ -1376,7 +1371,7 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"): """ size_splits = ops.convert_to_tensor(num_or_size_splits) if size_splits._rank() == 0 and size_splits.dtype.is_integer: - return gen_array_ops._split( + return gen_array_ops.split( axis=axis, num_split=num_or_size_splits, value=value, name=name) if num is None: @@ -1386,12 +1381,8 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"): if num is None: raise ValueError("Cannot infer num from shape %s" % num_or_size_splits) - return gen_array_ops._split_v( - value=value, - size_splits=size_splits, - axis=axis, - num_split=num, - name=name) + return gen_array_ops.split_v( + value=value, size_splits=size_splits, axis=axis, num_split=num, name=name) @tf_export("transpose") @@ -1461,7 +1452,7 @@ def transpose(a, perm=None, name="transpose", conjugate=False): """ with ops.name_scope(name, "transpose", [a]) as name: transpose_fn = ( - gen_array_ops._conjugate_transpose + gen_array_ops.conjugate_transpose if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose) if perm is None: rank = gen_array_ops.rank(a) @@ -1639,7 +1630,7 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): return zeros( shape_internal(tensor, optimize=optimize), dtype=dtype, name=name) with ops.device(tensor.device): - return gen_array_ops._zeros_like(tensor, name=name) + return gen_array_ops.zeros_like(tensor, name=name) # For now, variant types must be created via zeros_like; as we need to # pass the input variant object to the proper zeros callback. @@ -1654,7 +1645,7 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): return zeros( shape_internal(tensor, optimize=optimize), dtype=dtype, name=name) else: - return gen_array_ops._zeros_like(tensor, name=name) + return gen_array_ops.zeros_like(tensor, name=name) @tf_export("ones_like") @@ -1775,7 +1766,7 @@ def placeholder(dtype, shape=None, name=None): raise RuntimeError("tf.placeholder() is not compatible with " "eager execution.") - return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name) + return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name) # pylint: disable=redefined-outer-name @@ -1919,15 +1910,15 @@ def pad(tensor, paddings, mode="CONSTANT", name=None, constant_values=0): # pyl # TODO(rjryan): Once the forward compatibility period (3 weeks) have passed # remove the "Pad" fallback here. if constant_values != 0: - result = gen_array_ops._pad_v2( + result = gen_array_ops.pad_v2( tensor, paddings, constant_values, name=name) else: - result = gen_array_ops._pad(tensor, paddings, name=name) + result = gen_array_ops.pad(tensor, paddings, name=name) elif mode == "REFLECT": - result = gen_array_ops._mirror_pad( + result = gen_array_ops.mirror_pad( tensor, paddings, mode="REFLECT", name=name) elif mode == "SYMMETRIC": - result = gen_array_ops._mirror_pad( + result = gen_array_ops.mirror_pad( tensor, paddings, mode="SYMMETRIC", name=name) else: raise ValueError("Unknown padding mode: %s" % mode) @@ -2157,7 +2148,7 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"): sparse_tensor.SparseTensorValue)): raise TypeError("Truth must be a SparseTensor.") - return gen_array_ops._edit_distance( + return gen_array_ops.edit_distance( hypothesis.indices, hypothesis.values, hypothesis.dense_shape, @@ -2294,7 +2285,7 @@ def space_to_batch(input, paddings, block_size, name=None): # pylint: disable=r return result -space_to_batch.__doc__ = gen_array_ops._space_to_batch.__doc__ +space_to_batch.__doc__ = gen_array_ops.space_to_batch.__doc__ @tf_export("space_to_depth") @@ -2324,7 +2315,7 @@ def batch_to_space(input, crops, block_size, name=None): # pylint: disable=rede return result -batch_to_space.__doc__ = gen_array_ops._batch_to_space.__doc__ +batch_to_space.__doc__ = gen_array_ops.batch_to_space.__doc__ @tf_export("one_hot") @@ -2468,8 +2459,8 @@ def one_hot(indices, raise TypeError("dtype {0} of on_value does not match " "dtype {1} of off_value".format(on_dtype, off_dtype)) - return gen_array_ops._one_hot(indices, depth, on_value, off_value, axis, - name) + return gen_array_ops.one_hot(indices, depth, on_value, off_value, axis, + name) def _all_dimensions(x): @@ -2597,7 +2588,7 @@ def squeeze(input, axis=None, name=None, squeeze_dims=None): axis = squeeze_dims if np.isscalar(axis): axis = [axis] - return gen_array_ops._squeeze(input, axis, name) + return gen_array_ops.squeeze(input, axis, name) @tf_export("where") @@ -2648,7 +2639,7 @@ def where(condition, x=None, y=None, name=None): condition, preferred_dtype=dtypes.bool, name="condition") return gen_array_ops.where(condition=condition, name=name) elif x is not None and y is not None: - return gen_math_ops._select(condition=condition, x=x, y=y, name=name) + return gen_math_ops.select(condition=condition, x=x, y=y, name=name) else: raise ValueError("x and y must both be non-None or both be None.") diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py index c2ee2b3832..4f65e3771c 100644 --- a/tensorflow/python/ops/batch_norm_benchmark.py +++ b/tensorflow/python/ops/batch_norm_benchmark.py @@ -41,9 +41,8 @@ def batch_norm_op(tensor, mean, variance, beta, gamma, scale): # _batch_norm_with_global_normalization is deprecated in v9 ops.get_default_graph().graph_def_versions.producer = 8 # pylint: disable=protected-access - return gen_nn_ops._batch_norm_with_global_normalization(tensor, mean, - variance, beta, gamma, - 0.001, scale) + return gen_nn_ops.batch_norm_with_global_normalization( + tensor, mean, variance, beta, gamma, 0.001, scale) # pylint: enable=protected-access diff --git a/tensorflow/python/ops/candidate_sampling_ops.py b/tensorflow/python/ops/candidate_sampling_ops.py index 220ef1754d..9ea1ea9c92 100644 --- a/tensorflow/python/ops/candidate_sampling_ops.py +++ b/tensorflow/python/ops/candidate_sampling_ops.py @@ -77,7 +77,7 @@ def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique, of each of `sampled_candidates`. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._uniform_candidate_sampler( + return gen_candidate_sampling_ops.uniform_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, seed=seed1, seed2=seed2, name=name) @@ -136,7 +136,7 @@ def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique, of each of `sampled_candidates`. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._log_uniform_candidate_sampler( + return gen_candidate_sampling_ops.log_uniform_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, seed=seed1, seed2=seed2, name=name) @@ -193,7 +193,7 @@ def learned_unigram_candidate_sampler(true_classes, num_true, num_sampled, """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._learned_unigram_candidate_sampler( + return gen_candidate_sampling_ops.learned_unigram_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, seed=seed1, seed2=seed2, name=name) @@ -283,7 +283,7 @@ def fixed_unigram_candidate_sampler(true_classes, """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._fixed_unigram_candidate_sampler( + return gen_candidate_sampling_ops.fixed_unigram_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, vocab_file=vocab_file, distortion=distortion, num_reserved_ids=num_reserved_ids, num_shards=num_shards, shard=shard, @@ -321,7 +321,7 @@ def all_candidate_sampler(true_classes, num_true, num_sampled, unique, of each of `sampled_candidates`. All returned values are 1.0. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._all_candidate_sampler( + return gen_candidate_sampling_ops.all_candidate_sampler( true_classes, num_true, num_sampled, unique, seed=seed1, seed2=seed2, name=name) @@ -370,6 +370,6 @@ def compute_accidental_hits(true_classes, sampled_candidates, num_true, """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._compute_accidental_hits( + return gen_candidate_sampling_ops.compute_accidental_hits( true_classes, sampled_candidates, num_true, seed=seed1, seed2=seed2, name=name) diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py index 97b57177b2..21354b5ae8 100644 --- a/tensorflow/python/ops/control_flow_grad.py +++ b/tensorflow/python/ops/control_flow_grad.py @@ -28,7 +28,6 @@ from tensorflow.python.ops import math_ops # go/tf-wildcard-import # pylint: disable=wildcard-import,undefined-variable from tensorflow.python.ops.control_flow_ops import * -from tensorflow.python.ops.gen_control_flow_ops import * # pylint: enable=wildcard-import diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 215c6940df..689f7cdc8f 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -329,7 +329,7 @@ def exit(data, name=None): # pylint: disable=redefined-builtin data = ops.internal_convert_to_tensor_or_indexed_slices(data, as_ref=True) if isinstance(data, ops.Tensor): if data.dtype._is_ref_dtype: # pylint: disable=protected-access - return gen_control_flow_ops._ref_exit(data, name) + return gen_control_flow_ops.ref_exit(data, name) else: return gen_control_flow_ops._exit(data, name) else: @@ -371,17 +371,17 @@ def switch(data, pred, dtype=None, name=None): data, dtype=dtype, name="data", as_ref=True) pred = ops.convert_to_tensor(pred, name="pred") if isinstance(data, ops.Tensor): - return gen_control_flow_ops._switch(data, pred, name=name) + return gen_control_flow_ops.switch(data, pred, name=name) else: if not isinstance(data, (ops.IndexedSlices, sparse_tensor.SparseTensor)): raise TypeError("Type %s not supported" % type(data)) val, ind = data.values, data.indices - val_f, val_t = gen_control_flow_ops._switch(val, pred, name=name) - ind_f, ind_t = gen_control_flow_ops._switch(ind, pred, name="indices") + val_f, val_t = gen_control_flow_ops.switch(val, pred, name=name) + ind_f, ind_t = gen_control_flow_ops.switch(ind, pred, name="indices") if isinstance(data, ops.IndexedSlices): dense_shape = data.dense_shape if dense_shape is not None: - dense_shape_f, dense_shape_t = gen_control_flow_ops._switch( + dense_shape_f, dense_shape_t = gen_control_flow_ops.switch( dense_shape, pred, name="dense_shape") else: dense_shape_f, dense_shape_t = None, None @@ -389,7 +389,7 @@ def switch(data, pred, dtype=None, name=None): ops.IndexedSlices(val_t, ind_t, dense_shape_t)) else: dense_shape = data.dense_shape - dense_shape_f, dense_shape_t = gen_control_flow_ops._switch( + dense_shape_f, dense_shape_t = gen_control_flow_ops.switch( data.dense_shape, pred, name="dense_shape") return (sparse_tensor.SparseTensor(ind_f, val_f, dense_shape_f), sparse_tensor.SparseTensor(ind_t, val_t, dense_shape_t)) @@ -473,15 +473,15 @@ def merge(inputs, name=None): ] if all([isinstance(v, ops.Tensor) for v in inputs]): if all([v.dtype._is_ref_dtype for v in inputs]): # pylint: disable=protected-access - return gen_control_flow_ops._ref_merge(inputs, name) + return gen_control_flow_ops.ref_merge(inputs, name) else: - return gen_control_flow_ops._merge(inputs, name) + return gen_control_flow_ops.merge(inputs, name) elif all([isinstance(v, sparse_tensor.SparseTensor) for v in inputs]): # Only handle the case when all inputs are SparseTensor. values, _ = merge([inp.values for inp in inputs], name=name) - indices, chosen_index = gen_control_flow_ops._merge( + indices, chosen_index = gen_control_flow_ops.merge( [inp.indices for inp in inputs], name="indices") - dense_shape, _ = gen_control_flow_ops._merge( + dense_shape, _ = gen_control_flow_ops.merge( [inp.dense_shape for inp in inputs], name="dense_shape") return (sparse_tensor.SparseTensor(indices, values, dense_shape), chosen_index) @@ -489,13 +489,13 @@ def merge(inputs, name=None): # For now convert all the inputs as IndexedSlices. inputs = math_ops._as_indexed_slices_list(inputs, optimize=False) values, _ = merge([inp.values for inp in inputs], name=name) - indices, chosen_index = gen_control_flow_ops._merge( + indices, chosen_index = gen_control_flow_ops.merge( [inp.indices for inp in inputs], name="indices") if any(inp.dense_shape is not None for inp in inputs): if any(inp.dense_shape is None for inp in inputs): raise ValueError("Either all merged IndexedSlices must have a " "dense_shape, or none must have a dense_shape.") - dense_shape, _ = gen_control_flow_ops._merge( + dense_shape, _ = gen_control_flow_ops.merge( [inp.dense_shape for inp in inputs], name="dense_shape") else: dense_shape = None @@ -1015,10 +1015,8 @@ class GradLoopState(object): else: max_size = GetMaxSizeFromNestedMaximumIterations( value, self.forward_context) - # pylint: disable=protected-access - acc = gen_data_flow_ops._stack_v2( + acc = gen_data_flow_ops.stack_v2( max_size=max_size, elem_type=value.dtype.base_dtype, name="f_acc") - # pylint: enable=protected-access if curr_ctxt: curr_ctxt.Exit() @@ -1031,10 +1029,8 @@ class GradLoopState(object): if value_ctxt == self.forward_context: # value is not nested in the forward context. self.forward_context.Enter() - # pylint: disable=protected-access - push = gen_data_flow_ops._stack_push_v2( + push = gen_data_flow_ops.stack_push_v2( enter_acc, value, swap_memory=swap_enabled) - # pylint: enable=protected-access self.forward_context.Exit() # Protect stack push and order it before forward_index. self.forward_index.op._add_control_input(push.op) @@ -1046,18 +1042,14 @@ class GradLoopState(object): # The special case for creating a zero tensor for a dead # branch of a switch. See ControlFlowState.ZerosLike(). value_ctxt.outer_context.Enter() - # pylint: disable=protected-access - push = gen_data_flow_ops._stack_push_v2( + push = gen_data_flow_ops.stack_push_v2( enter_acc, value, swap_memory=swap_enabled) - # pylint: enable=protected-access value_ctxt.outer_context.Exit() push.op._set_control_flow_context(value_ctxt) else: value_ctxt.Enter() - # pylint: disable=protected-access - push = gen_data_flow_ops._stack_push_v2( + push = gen_data_flow_ops.stack_push_v2( enter_acc, value, swap_memory=swap_enabled) - # pylint: enable=protected-access value_ctxt.Exit() # Protect stack push and order it before forward_sync. self.forward_sync._add_control_input(push.op) @@ -1104,10 +1096,8 @@ class GradLoopState(object): pred = cond_ctxt.pred branch = (1 - cond_ctxt.branch) if dead_branch else cond_ctxt.branch history_value = _SwitchRefOrTensor(history_value, pred)[branch] - # pylint: disable=protected-access - pop = gen_data_flow_ops._stack_pop_v2(history_value, - value.dtype.base_dtype) - # pylint: enable=protected-access + pop = gen_data_flow_ops.stack_pop_v2(history_value, + value.dtype.base_dtype) pop.set_shape(value.get_shape()) self.grad_context.Exit() parallel_iterations = self.grad_context.parallel_iterations diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 83da6739db..4b57e2de79 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -148,7 +148,7 @@ def ctc_loss(labels, inputs, sequence_length, if not time_major: inputs = array_ops.transpose(inputs, [1, 0, 2]) # (B,T,N) => (T,B,N) - loss, _ = gen_ctc_ops._ctc_loss( + loss, _ = gen_ctc_ops.ctc_loss( inputs, labels.indices, labels.values, @@ -224,7 +224,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): sequence found, the negative of the sum of the greatest logit at each timeframe. """ - outputs = gen_ctc_ops._ctc_greedy_decoder( + outputs = gen_ctc_ops.ctc_greedy_decoder( inputs, sequence_length, merge_repeated=merge_repeated) (decoded_ix, decoded_val, decoded_shape, log_probabilities) = outputs return ([sparse_tensor.SparseTensor(decoded_ix, decoded_val, decoded_shape)], @@ -272,7 +272,7 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, """ decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = ( - gen_ctc_ops._ctc_beam_search_decoder( + gen_ctc_ops.ctc_beam_search_decoder( inputs, sequence_length, beam_width=beam_width, top_paths=top_paths, merge_repeated=merge_repeated)) diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 03ed537cfc..052caffd49 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -342,10 +342,10 @@ class QueueBase(object): val.get_shape().assert_is_compatible_with(shape) if self._queue_ref.dtype == _dtypes.resource: - return gen_data_flow_ops._queue_enqueue_v2( + return gen_data_flow_ops.queue_enqueue_v2( self._queue_ref, vals, name=scope) else: - return gen_data_flow_ops._queue_enqueue( + return gen_data_flow_ops.queue_enqueue( self._queue_ref, vals, name=scope) def enqueue_many(self, vals, name=None): @@ -387,7 +387,7 @@ class QueueBase(object): val.get_shape().with_rank_at_least(1)[0]) val.get_shape()[1:].assert_is_compatible_with(shape) - return gen_data_flow_ops._queue_enqueue_many_v2( + return gen_data_flow_ops.queue_enqueue_many_v2( self._queue_ref, vals, name=scope) def _dequeue_return_value(self, tensors): @@ -436,10 +436,10 @@ class QueueBase(object): if name is None: name = "%s_Dequeue" % self._name if self._queue_ref.dtype == _dtypes.resource: - ret = gen_data_flow_ops._queue_dequeue_v2( + ret = gen_data_flow_ops.queue_dequeue_v2( self._queue_ref, self._dtypes, name=name) else: - ret = gen_data_flow_ops._queue_dequeue( + ret = gen_data_flow_ops.queue_dequeue( self._queue_ref, self._dtypes, name=name) # NOTE(mrry): Not using a shape function because we need access to @@ -479,7 +479,7 @@ class QueueBase(object): if name is None: name = "%s_DequeueMany" % self._name - ret = gen_data_flow_ops._queue_dequeue_many_v2( + ret = gen_data_flow_ops.queue_dequeue_many_v2( self._queue_ref, n=n, component_types=self._dtypes, name=name) # NOTE(mrry): Not using a shape function because we need access to @@ -523,7 +523,7 @@ class QueueBase(object): if name is None: name = "%s_DequeueUpTo" % self._name - ret = gen_data_flow_ops._queue_dequeue_up_to_v2( + ret = gen_data_flow_ops.queue_dequeue_up_to_v2( self._queue_ref, n=n, component_types=self._dtypes, name=name) # NOTE(mrry): Not using a shape function because we need access to @@ -560,12 +560,12 @@ class QueueBase(object): if name is None: name = "%s_Close" % self._name if self._queue_ref.dtype == _dtypes.resource: - return gen_data_flow_ops._queue_close_v2( + return gen_data_flow_ops.queue_close_v2( self._queue_ref, cancel_pending_enqueues=cancel_pending_enqueues, name=name) else: - return gen_data_flow_ops._queue_close( + return gen_data_flow_ops.queue_close( self._queue_ref, cancel_pending_enqueues=cancel_pending_enqueues, name=name) @@ -601,9 +601,9 @@ class QueueBase(object): if name is None: name = "%s_Size" % self._name if self._queue_ref.dtype == _dtypes.resource: - return gen_data_flow_ops._queue_size_v2(self._queue_ref, name=name) + return gen_data_flow_ops.queue_size_v2(self._queue_ref, name=name) else: - return gen_data_flow_ops._queue_size(self._queue_ref, name=name) + return gen_data_flow_ops.queue_size(self._queue_ref, name=name) @tf_export("RandomShuffleQueue") @@ -683,7 +683,7 @@ class RandomShuffleQueue(QueueBase): # the id of the last op created.) string = (str(seed1) + shared_name).encode("utf-8") seed2 = int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF - queue_ref = gen_data_flow_ops._random_shuffle_queue_v2( + queue_ref = gen_data_flow_ops.random_shuffle_queue_v2( component_types=dtypes, shapes=shapes, capacity=capacity, @@ -748,7 +748,7 @@ class FIFOQueue(QueueBase): dtypes = _as_type_list(dtypes) shapes = _as_shape_list(shapes, dtypes) names = _as_name_list(names, dtypes) - queue_ref = gen_data_flow_ops._fifo_queue_v2( + queue_ref = gen_data_flow_ops.fifo_queue_v2( component_types=dtypes, shapes=shapes, capacity=capacity, @@ -827,7 +827,7 @@ class PaddingFIFOQueue(QueueBase): "but received %d dtypes and %d shapes." % (len(dtypes), len(shapes))) - queue_ref = gen_data_flow_ops._padding_fifo_queue_v2( + queue_ref = gen_data_flow_ops.padding_fifo_queue_v2( component_types=dtypes, shapes=shapes, capacity=capacity, @@ -895,7 +895,7 @@ class PriorityQueue(QueueBase): types = _as_type_list(types) shapes = _as_shape_list(shapes, types) - queue_ref = gen_data_flow_ops._priority_queue_v2( + queue_ref = gen_data_flow_ops.priority_queue_v2( component_types=types, shapes=shapes, capacity=capacity, @@ -985,7 +985,7 @@ class Barrier(object): else: self._shapes = [tensor_shape.unknown_shape() for _ in self._types] - self._barrier_ref = gen_data_flow_ops._barrier( + self._barrier_ref = gen_data_flow_ops.barrier( component_types=self._types, shapes=self._shapes, shared_name=shared_name, @@ -1026,7 +1026,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierInsertMany" % self._name - return gen_data_flow_ops._barrier_insert_many( + return gen_data_flow_ops.barrier_insert_many( self._barrier_ref, keys, values, component_index, name=name) def take_many(self, @@ -1073,7 +1073,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierTakeMany" % self._name - ret = gen_data_flow_ops._barrier_take_many( + ret = gen_data_flow_ops.barrier_take_many( self._barrier_ref, num_elements, self._types, @@ -1122,7 +1122,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierClose" % self._name - return gen_data_flow_ops._barrier_close( + return gen_data_flow_ops.barrier_close( self._barrier_ref, cancel_pending_enqueues=cancel_pending_enqueues, name=name) @@ -1139,7 +1139,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierReadySize" % self._name - return gen_data_flow_ops._barrier_ready_size(self._barrier_ref, name=name) + return gen_data_flow_ops.barrier_ready_size(self._barrier_ref, name=name) def incomplete_size(self, name=None): """Compute the number of incomplete elements in the given barrier. @@ -1153,7 +1153,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierIncompleteSize" % self._name - return gen_data_flow_ops._barrier_incomplete_size( + return gen_data_flow_ops.barrier_incomplete_size( self._barrier_ref, name=name) diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index ac03d30fcd..09a0e345f2 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -41,7 +41,7 @@ from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops.gen_functional_ops import * # pylint: enable=wildcard-import # pylint: disable=unused-import -from tensorflow.python.ops.gen_functional_ops import _symbolic_gradient +from tensorflow.python.ops.gen_functional_ops import symbolic_gradient # pylint: enable=unused-import from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 227316a01e..be61014395 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -356,7 +356,7 @@ def _SymGrad(op, out_grads): for k in op.node_def.attr: f.attr[k].CopyFrom(op.node_def.attr[k]) # pylint: disable=protected-access - in_grads = functional_ops._symbolic_gradient(input=f_in, Tout=f_types, f=f) + in_grads = functional_ops.symbolic_gradient(input=f_in, Tout=f_types, f=f) # pylint: enable=protected-access return in_grads diff --git a/tensorflow/python/ops/histogram_ops.py b/tensorflow/python/ops/histogram_ops.py index 6a975160b0..4a1ef54fb5 100644 --- a/tensorflow/python/ops/histogram_ops.py +++ b/tensorflow/python/ops/histogram_ops.py @@ -141,5 +141,7 @@ def histogram_fixed_width(values, """ with ops.name_scope(name, 'histogram_fixed_width', [values, value_range, nbins]) as name: - return gen_math_ops._histogram_fixed_width( # pylint: disable=protected-access + # pylint: disable=protected-access + return gen_math_ops._histogram_fixed_width( values, value_range, nbins, dtype=dtype, name=name) + # pylint: enable=protected-access diff --git a/tensorflow/python/ops/image_grad.py b/tensorflow/python/ops/image_grad.py index 093843cd5b..9f43e3f146 100644 --- a/tensorflow/python/ops/image_grad.py +++ b/tensorflow/python/ops/image_grad.py @@ -41,12 +41,10 @@ def _ResizeNearestNeighborGrad(op, grad): else: image_shape = array_ops.shape(image)[1:3] - # pylint: disable=protected-access - grads = gen_image_ops._resize_nearest_neighbor_grad( + grads = gen_image_ops.resize_nearest_neighbor_grad( grad, image_shape, align_corners=op.get_attr("align_corners")) - # pylint: enable=protected-access return [grads, None] @@ -61,10 +59,8 @@ def _ResizeBilinearGrad(op, grad): Returns: The gradients w.r.t. the input. """ - # pylint: disable=protected-access - grad0 = gen_image_ops._resize_bilinear_grad( + grad0 = gen_image_ops.resize_bilinear_grad( grad, op.inputs[0], align_corners=op.get_attr("align_corners")) - # pylint: enable=protected-access return [grad0, None] @@ -82,10 +78,8 @@ def _ResizeBicubicGrad(op, grad): allowed_types = [dtypes.float32, dtypes.float64] grad0 = None if op.inputs[0].dtype in allowed_types: - # pylint: disable=protected-access - grad0 = gen_image_ops._resize_bicubic_grad( + grad0 = gen_image_ops.resize_bicubic_grad( grad, op.inputs[0], align_corners=op.get_attr("align_corners")) - # pylint: enable=protected-access return [grad0, None] diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 53bd108c44..ca8806a095 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1115,10 +1115,8 @@ def adjust_contrast(images, contrast_factor): orig_dtype = images.dtype flt_images = convert_image_dtype(images, dtypes.float32) - # pylint: disable=protected-access - adjusted = gen_image_ops._adjust_contrastv2( + adjusted = gen_image_ops.adjust_contrastv2( flt_images, contrast_factor=contrast_factor, name=name) - # pylint: enable=protected-access return convert_image_dtype(adjusted, orig_dtype, saturate=True) @@ -1732,7 +1730,7 @@ def sample_distorted_bounding_box(image_size, Provide as input to `tf.image.draw_bounding_boxes`. """ with ops.name_scope(name, 'sample_distorted_bounding_box'): - return gen_image_ops._sample_distorted_bounding_box_v2( # pylint: disable=protected-access + return gen_image_ops.sample_distorted_bounding_box_v2( image_size, bounding_boxes, seed=seed, @@ -1786,10 +1784,8 @@ def non_max_suppression(boxes, """ with ops.name_scope(name, 'non_max_suppression'): iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') - # pylint: disable=protected-access - return gen_image_ops._non_max_suppression_v2(boxes, scores, max_output_size, - iou_threshold) - # pylint: enable=protected-access + return gen_image_ops.non_max_suppression_v2(boxes, scores, max_output_size, + iou_threshold) _rgb_to_yiq_kernel = [[0.299, 0.59590059, diff --git a/tensorflow/python/ops/io_ops.py b/tensorflow/python/ops/io_ops.py index 5e70b3186f..7c782c12a5 100644 --- a/tensorflow/python/ops/io_ops.py +++ b/tensorflow/python/ops/io_ops.py @@ -111,10 +111,10 @@ def _save(filename, tensor_names, tensors, tensor_slices=None, name="save"): An Operation that saves the tensors. """ if tensor_slices is None: - return gen_io_ops._save(filename, tensor_names, tensors, name=name) + return gen_io_ops.save(filename, tensor_names, tensors, name=name) else: - return gen_io_ops._save_slices(filename, tensor_names, tensor_slices, - tensors, name=name) + return gen_io_ops.save_slices(filename, tensor_names, tensor_slices, + tensors, name=name) def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type, @@ -136,7 +136,7 @@ def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type, A tensor of type "tensor_type". """ base_type = dtypes.as_dtype(tensor_type).base_dtype - return gen_io_ops._restore_slice( + return gen_io_ops.restore_slice( file_pattern, tensor_name, shape_and_slice, base_type, preferred_shard, name=name) @@ -208,12 +208,12 @@ class ReaderBase(object): else: queue_ref = queue.queue_ref if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_read_v2(self._reader_ref, queue_ref, name=name) + return gen_io_ops.reader_read_v2(self._reader_ref, queue_ref, name=name) else: # For compatibility with pre-resource queues, create a ref(string) tensor # which can be looked up as the same queue by a resource manager. - old_queue_op = gen_data_flow_ops._fake_queue(queue_ref) - return gen_io_ops._reader_read(self._reader_ref, old_queue_op, name=name) + old_queue_op = gen_data_flow_ops.fake_queue(queue_ref) + return gen_io_ops.reader_read(self._reader_ref, old_queue_op, name=name) def read_up_to(self, queue, num_records, # pylint: disable=invalid-name name=None): @@ -240,18 +240,18 @@ class ReaderBase(object): else: queue_ref = queue.queue_ref if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_read_up_to_v2(self._reader_ref, - queue_ref, - num_records, - name=name) + return gen_io_ops.reader_read_up_to_v2(self._reader_ref, + queue_ref, + num_records, + name=name) else: # For compatibility with pre-resource queues, create a ref(string) tensor # which can be looked up as the same queue by a resource manager. - old_queue_op = gen_data_flow_ops._fake_queue(queue_ref) - return gen_io_ops._reader_read_up_to(self._reader_ref, - old_queue_op, - num_records, - name=name) + old_queue_op = gen_data_flow_ops.fake_queue(queue_ref) + return gen_io_ops.reader_read_up_to(self._reader_ref, + old_queue_op, + num_records, + name=name) def num_records_produced(self, name=None): """Returns the number of records this reader has produced. @@ -267,11 +267,11 @@ class ReaderBase(object): """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_num_records_produced_v2(self._reader_ref, - name=name) + return gen_io_ops.reader_num_records_produced_v2(self._reader_ref, + name=name) else: - return gen_io_ops._reader_num_records_produced(self._reader_ref, - name=name) + return gen_io_ops.reader_num_records_produced(self._reader_ref, + name=name) def num_work_units_completed(self, name=None): """Returns the number of work units this reader has finished processing. @@ -283,11 +283,11 @@ class ReaderBase(object): An int64 Tensor. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_num_work_units_completed_v2(self._reader_ref, - name=name) + return gen_io_ops.reader_num_work_units_completed_v2(self._reader_ref, + name=name) else: - return gen_io_ops._reader_num_work_units_completed(self._reader_ref, - name=name) + return gen_io_ops.reader_num_work_units_completed(self._reader_ref, + name=name) def serialize_state(self, name=None): """Produce a string tensor that encodes the state of a reader. @@ -302,9 +302,9 @@ class ReaderBase(object): A string Tensor. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_serialize_state_v2(self._reader_ref, name=name) + return gen_io_ops.reader_serialize_state_v2(self._reader_ref, name=name) else: - return gen_io_ops._reader_serialize_state(self._reader_ref, name=name) + return gen_io_ops.reader_serialize_state(self._reader_ref, name=name) def restore_state(self, state, name=None): """Restore a reader to a previously saved state. @@ -321,11 +321,10 @@ class ReaderBase(object): The created Operation. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_restore_state_v2( + return gen_io_ops.reader_restore_state_v2( self._reader_ref, state, name=name) else: - return gen_io_ops._reader_restore_state( - self._reader_ref, state, name=name) + return gen_io_ops.reader_restore_state(self._reader_ref, state, name=name) @property def supports_serialize(self): @@ -342,9 +341,9 @@ class ReaderBase(object): The created Operation. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_reset_v2(self._reader_ref, name=name) + return gen_io_ops.reader_reset_v2(self._reader_ref, name=name) else: - return gen_io_ops._reader_reset(self._reader_ref, name=name) + return gen_io_ops.reader_reset(self._reader_ref, name=name) ops.NotDifferentiable("ReaderRead") @@ -377,7 +376,7 @@ class WholeFileReader(ReaderBase): Args: name: A name for the operation (optional). """ - rr = gen_io_ops._whole_file_reader_v2(name=name) + rr = gen_io_ops.whole_file_reader_v2(name=name) super(WholeFileReader, self).__init__(rr, supports_serialize=True) @@ -406,8 +405,8 @@ class TextLineReader(ReaderBase): to skip from the beginning of every file. name: A name for the operation (optional). """ - rr = gen_io_ops._text_line_reader_v2(skip_header_lines=skip_header_lines, - name=name) + rr = gen_io_ops.text_line_reader_v2(skip_header_lines=skip_header_lines, + name=name) super(TextLineReader, self).__init__(rr) @@ -444,7 +443,7 @@ class FixedLengthRecordReader(ReaderBase): name: A name for the operation (optional). encoding: The type of encoding for the file. Defaults to none. """ - rr = gen_io_ops._fixed_length_record_reader_v2( + rr = gen_io_ops.fixed_length_record_reader_v2( record_bytes=record_bytes, header_bytes=header_bytes, footer_bytes=footer_bytes, @@ -480,7 +479,7 @@ class TFRecordReader(ReaderBase): compression_type = python_io.TFRecordOptions.get_compression_type_string( options) - rr = gen_io_ops._tf_record_reader_v2( + rr = gen_io_ops.tf_record_reader_v2( name=name, compression_type=compression_type) super(TFRecordReader, self).__init__(rr) @@ -506,7 +505,7 @@ class LMDBReader(ReaderBase): name: A name for the operation (optional). options: A LMDBRecordOptions object (optional). """ - rr = gen_io_ops._lmdb_reader(name=name) + rr = gen_io_ops.lmdb_reader(name=name) super(LMDBReader, self).__init__(rr) @@ -534,7 +533,7 @@ class IdentityReader(ReaderBase): Args: name: A name for the operation (optional). """ - rr = gen_io_ops._identity_reader_v2(name=name) + rr = gen_io_ops.identity_reader_v2(name=name) super(IdentityReader, self).__init__(rr, supports_serialize=True) diff --git a/tensorflow/python/ops/linalg/linalg_impl.py b/tensorflow/python/ops/linalg/linalg_impl.py index d5bd916f80..2be2d5a3d4 100644 --- a/tensorflow/python/ops/linalg/linalg_impl.py +++ b/tensorflow/python/ops/linalg/linalg_impl.py @@ -31,18 +31,16 @@ band_part = array_ops.matrix_band_part cholesky = linalg_ops.cholesky cholesky_solve = linalg_ops.cholesky_solve det = linalg_ops.matrix_determinant -# pylint: disable=protected-access -slogdet = gen_linalg_ops._log_matrix_determinant -# pylint: disable=protected-access +slogdet = gen_linalg_ops.log_matrix_determinant diag = array_ops.matrix_diag diag_part = array_ops.matrix_diag_part eigh = linalg_ops.self_adjoint_eig eigvalsh = linalg_ops.self_adjoint_eigvals einsum = special_math_ops.einsum -expm = gen_linalg_ops._matrix_exponential +expm = gen_linalg_ops.matrix_exponential eye = linalg_ops.eye inv = linalg_ops.matrix_inverse -logm = gen_linalg_ops._matrix_logarithm +logm = gen_linalg_ops.matrix_logarithm lstsq = linalg_ops.matrix_solve_ls norm = linalg_ops.norm qr = linalg_ops.qr diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 9803eed6ae..37470e00d7 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -248,7 +248,7 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None): and l2_regularizer != 0 due to poor accuracy. """ - # pylint: disable=protected-access,long-lambda + # pylint: disable=long-lambda def _use_composite_impl(fast, tensor_shape): """Determines whether to use the composite or specialized CPU kernel. @@ -323,9 +323,8 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None): if _use_composite_impl(fast, tensor_shape): return _composite_impl(matrix, rhs, l2_regularizer) else: - return gen_linalg_ops._matrix_solve_ls( + return gen_linalg_ops.matrix_solve_ls( matrix, rhs, l2_regularizer, fast=fast, name=name) - # pylint: enable=protected-access @tf_export('self_adjoint_eig', 'linalg.eigh') @@ -346,8 +345,7 @@ def self_adjoint_eig(tensor, name=None): v: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most matrices contain eigenvectors of the corresponding matrices in `tensor` """ - # pylint: disable=protected-access - e, v = gen_linalg_ops._self_adjoint_eig_v2(tensor, compute_v=True, name=name) + e, v = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=True, name=name) return e, v @@ -369,8 +367,7 @@ def self_adjoint_eigvals(tensor, name=None): e: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N` eigenvalues of `tensor[..., :, :]`. """ - # pylint: disable=protected-access - e, _ = gen_linalg_ops._self_adjoint_eig_v2(tensor, compute_v=False, name=name) + e, _ = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=False, name=name) return e @@ -435,10 +432,8 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): ```` @end_compatibility """ - # pylint: disable=protected-access - s, u, v = gen_linalg_ops._svd( + s, u, v = gen_linalg_ops.svd( tensor, compute_uv=compute_uv, full_matrices=full_matrices, name=name) - # pylint: enable=protected-access if compute_uv: return math_ops.real(s), u, v else: diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index 3757109c95..a7ea7dc6e1 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -170,7 +170,7 @@ def image_summary(tag, tensor, max_images=3, collections=None, name=None): buffer. """ with ops.name_scope(name, "ImageSummary", [tag, tensor]) as scope: - val = gen_logging_ops._image_summary( + val = gen_logging_ops.image_summary( tag=tag, tensor=tensor, max_images=max_images, name=scope) _Collect(val, collections, [ops.GraphKeys.SUMMARIES]) return val @@ -226,11 +226,12 @@ def audio_summary(tag, with ops.name_scope(name, "AudioSummary", [tag, tensor]) as scope: sample_rate = ops.convert_to_tensor(sample_rate, dtype=dtypes.float32, name="sample_rate") - val = gen_logging_ops._audio_summary_v2(tag=tag, - tensor=tensor, - max_outputs=max_outputs, - sample_rate=sample_rate, - name=scope) + val = gen_logging_ops.audio_summary_v2( + tag=tag, + tensor=tensor, + max_outputs=max_outputs, + sample_rate=sample_rate, + name=scope) _Collect(val, collections, [ops.GraphKeys.SUMMARIES]) return val @@ -263,7 +264,7 @@ def merge_summary(inputs, collections=None, name=None): buffer resulting from the merging. """ with ops.name_scope(name, "MergeSummary", inputs): - val = gen_logging_ops._merge_summary(inputs=inputs, name=name) + val = gen_logging_ops.merge_summary(inputs=inputs, name=name) _Collect(val, collections, []) return val diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index f539a7bb68..baf7cc19fa 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -196,9 +196,7 @@ class InitializableLookupTableBase(LookupInterface): """ with ops.name_scope(name, "%s_Size" % self._name, [self._table_ref]) as scope: - # pylint: disable=protected-access - return gen_lookup_ops._lookup_table_size_v2(self._table_ref, name=scope) - # pylint: enable=protected-access + return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=scope) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -227,10 +225,8 @@ class InitializableLookupTableBase(LookupInterface): with ops.name_scope(name, "%s_Lookup" % self._name, (self._table_ref, key_tensor, self._default_value)) as scope: - # pylint: disable=protected-access - values = gen_lookup_ops._lookup_table_find_v2( + values = gen_lookup_ops.lookup_table_find_v2( self._table_ref, key_tensor, self._default_value, name=scope) - # pylint: enable=protected-access values.set_shape(key_tensor.get_shape()) if isinstance(keys, sparse_tensor.SparseTensor): @@ -274,13 +270,11 @@ class HashTable(InitializableLookupTableBase): """ with ops.name_scope(name, "hash_table", (initializer, default_value)) as scope: - # pylint: disable=protected-access - table_ref = gen_lookup_ops._hash_table_v2( + table_ref = gen_lookup_ops.hash_table_v2( shared_name=shared_name, key_dtype=initializer.key_dtype, value_dtype=initializer.value_dtype, name=scope) - # pylint: enable=protected-access super(HashTable, self).__init__(table_ref, default_value, initializer) @@ -352,10 +346,8 @@ class KeyValueTensorInitializer(TableInitializerBase): with ops.name_scope( self._name, values=(table.table_ref, self._keys, self._values)) as scope: - # pylint: disable=protected-access - init_op = gen_lookup_ops._initialize_table_v2( + init_op = gen_lookup_ops.initialize_table_v2( table.table_ref, self._keys, self._values, name=scope) - # pylint: enable=protected-access ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op) return init_op @@ -518,8 +510,7 @@ class TextFileInitializer(TableInitializerBase): (table.table_ref,)) as scope: filename = ops.convert_to_tensor( self._filename, dtypes.string, name="asset_filepath") - # pylint: disable=protected-access - init_op = gen_lookup_ops._initialize_table_from_text_file_v2( + init_op = gen_lookup_ops.initialize_table_from_text_file_v2( table.table_ref, filename, self._key_index, @@ -527,7 +518,6 @@ class TextFileInitializer(TableInitializerBase): -1 if self._vocab_size is None else self._vocab_size, self._delimiter, name=scope) - # pylint: enable=protected-access ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op) # If the filename tensor is anything other than a string constant (e.g., if # it is a placeholder) then it does not make sense to track it as an asset. diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index bf28f74153..51e19b4ad3 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -302,16 +302,14 @@ def _NegGrad(_, grad): def _InvGrad(op, grad): """Returns -grad * (1 / x^2).""" y = op.outputs[0] # y = 1 / x - # pylint: disable=protected-access - return gen_math_ops._reciprocal_grad(y, grad) + return gen_math_ops.reciprocal_grad(y, grad) @ops.RegisterGradient("Reciprocal") def _ReciprocalGrad(op, grad): """Returns -grad * (1 / x^2).""" y = op.outputs[0] # y = 1 / x - # pylint: disable=protected-access - return gen_math_ops._reciprocal_grad(y, grad) + return gen_math_ops.reciprocal_grad(y, grad) @ops.RegisterGradient("InvGrad") @@ -321,8 +319,7 @@ def _InvGradGrad(op, grad): with ops.control_dependencies([grad]): ca = math_ops.conj(op.inputs[0]) cg = math_ops.conj(grad) - # pylint: disable=protected-access - return cg * -2.0 * b * ca, gen_math_ops._reciprocal_grad(ca, grad) + return cg * -2.0 * b * ca, gen_math_ops.reciprocal_grad(ca, grad) @ops.RegisterGradient("ReciprocalGrad") @@ -332,8 +329,7 @@ def _ReciprocalGradGrad(op, grad): with ops.control_dependencies([grad]): ca = math_ops.conj(op.inputs[0]) cg = math_ops.conj(grad) - # pylint: disable=protected-access - return cg * -2.0 * b * ca, gen_math_ops._reciprocal_grad(ca, grad) + return cg * -2.0 * b * ca, gen_math_ops.reciprocal_grad(ca, grad) @ops.RegisterGradient("Square") @@ -348,9 +344,7 @@ def _SquareGrad(op, grad): @ops.RegisterGradient("Sqrt") def _SqrtGrad(op, grad): y = op.outputs[0] # y = x^(1/2) - # pylint: disable=protected-access - return gen_math_ops._sqrt_grad(y, grad) - # pylint: enable=protected-access + return gen_math_ops.sqrt_grad(y, grad) @ops.RegisterGradient("SqrtGrad") @@ -366,9 +360,7 @@ def _SqrtGradGrad(op, grad): def _RsqrtGrad(op, grad): """Returns -0.5 * grad * conj(y)^3.""" y = op.outputs[0] # y = x^(-1/2) - # pylint: disable=protected-access - return gen_math_ops._rsqrt_grad(y, grad) - # pylint: enable=protected-access + return gen_math_ops.rsqrt_grad(y, grad) @ops.RegisterGradient("RsqrtGrad") @@ -380,8 +372,7 @@ def _RsqrtGradGrad(op, grad): ca = math_ops.conj(a) cg = math_ops.conj(grad) grad_a = -1.5 * cg * b * math_ops.square(ca) - # pylint: disable=protected-access - grad_b = gen_math_ops._rsqrt_grad(ca, grad) + grad_b = gen_math_ops.rsqrt_grad(ca, grad) return grad_a, grad_b @@ -446,8 +437,7 @@ def _TanhGrad(op, grad): y = op.outputs[0] # y = tanh(x) with ops.control_dependencies([grad]): y = math_ops.conj(y) - # pylint: disable=protected-access - return gen_math_ops._tanh_grad(y, grad) + return gen_math_ops.tanh_grad(y, grad) @ops.RegisterGradient("Asinh") @@ -485,8 +475,7 @@ def _TanhGradGrad(op, grad): with ops.control_dependencies([grad]): a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) - # pylint: disable=protected-access - return grad * -2.0 * b * a, gen_math_ops._tanh_grad(a, grad) + return grad * -2.0 * b * a, gen_math_ops.tanh_grad(a, grad) @ops.RegisterGradient("Erf") @@ -634,8 +623,7 @@ def _SigmoidGrad(op, grad): y = op.outputs[0] # y = sigmoid(x) with ops.control_dependencies([grad]): y = math_ops.conj(y) - # pylint: disable=protected-access - return gen_math_ops._sigmoid_grad(y, grad) + return gen_math_ops.sigmoid_grad(y, grad) @ops.RegisterGradient("SigmoidGrad") @@ -644,8 +632,7 @@ def _SigmoidGradGrad(op, grad): a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) gb = grad * b - # pylint: disable=protected-access - return gb - 2.0 * gb * a, gen_math_ops._sigmoid_grad(a, grad) + return gb - 2.0 * gb * a, gen_math_ops.sigmoid_grad(a, grad) @ops.RegisterGradient("Sign") @@ -792,7 +779,7 @@ def _MulGrad(op, grad): if (isinstance(grad, ops.Tensor) and _ShapesFullySpecifiedAndEqual(x, y, grad) and grad.dtype in (dtypes.int32, dtypes.float32)): - return gen_math_ops._mul(grad, y), gen_math_ops._mul(grad, x) + return gen_math_ops.mul(grad, y), gen_math_ops.mul(grad, x) assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) @@ -800,9 +787,9 @@ def _MulGrad(op, grad): x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape( - math_ops.reduce_sum(gen_math_ops._mul(grad, y), rx), sx), + math_ops.reduce_sum(gen_math_ops.mul(grad, y), rx), sx), array_ops.reshape( - math_ops.reduce_sum(gen_math_ops._mul(x, grad), ry), sy)) + math_ops.reduce_sum(gen_math_ops.mul(x, grad), ry), sy)) # pylint: enable=protected-access @@ -976,20 +963,18 @@ def _MatMulGrad(op, grad): t_b = op.get_attr("transpose_b") a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) - # pylint: disable=protected-access if not t_a and not t_b: - grad_a = gen_math_ops._mat_mul(grad, b, transpose_b=True) - grad_b = gen_math_ops._mat_mul(a, grad, transpose_a=True) + grad_a = gen_math_ops.mat_mul(grad, b, transpose_b=True) + grad_b = gen_math_ops.mat_mul(a, grad, transpose_a=True) elif not t_a and t_b: - grad_a = gen_math_ops._mat_mul(grad, b) - grad_b = gen_math_ops._mat_mul(grad, a, transpose_a=True) + grad_a = gen_math_ops.mat_mul(grad, b) + grad_b = gen_math_ops.mat_mul(grad, a, transpose_a=True) elif t_a and not t_b: - grad_a = gen_math_ops._mat_mul(b, grad, transpose_b=True) - grad_b = gen_math_ops._mat_mul(a, grad) + grad_a = gen_math_ops.mat_mul(b, grad, transpose_b=True) + grad_b = gen_math_ops.mat_mul(a, grad) elif t_a and t_b: - grad_a = gen_math_ops._mat_mul(b, grad, transpose_a=True, transpose_b=True) - grad_b = gen_math_ops._mat_mul(grad, a, transpose_a=True, transpose_b=True) - # pylint: enable=protected-access + grad_a = gen_math_ops.mat_mul(b, grad, transpose_a=True, transpose_b=True) + grad_b = gen_math_ops.mat_mul(grad, a, transpose_a=True, transpose_b=True) return grad_a, grad_b diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index c3899c7e12..14d6862919 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -89,8 +89,6 @@ See the @{$python/math_ops} guide. @@matrix_inverse @@cholesky @@cholesky_solve -@@matrix_exponential -@@matrix_logarithm @@matrix_solve @@matrix_triangular_solve @@matrix_solve_ls @@ -260,7 +258,7 @@ def abs(x, name=None): # pylint: disable=redefined-builtin with ops.name_scope(name, "Abs", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): if x.values.dtype.is_complex: - x_abs = gen_math_ops._complex_abs( + x_abs = gen_math_ops.complex_abs( x.values, Tout=x.values.dtype.real_dtype, name=name) return sparse_tensor.SparseTensor( indices=x.indices, values=x_abs, dense_shape=x.dense_shape) @@ -270,7 +268,7 @@ def abs(x, name=None): # pylint: disable=redefined-builtin else: x = ops.convert_to_tensor(x, name="x") if x.dtype.is_complex: - return gen_math_ops._complex_abs(x, Tout=x.dtype.real_dtype, name=name) + return gen_math_ops.complex_abs(x, Tout=x.dtype.real_dtype, name=name) return gen_math_ops._abs(x, name=name) @@ -279,7 +277,7 @@ def abs(x, name=None): # pylint: disable=redefined-builtin # pylint: disable=redefined-builtin def _bucketize(input, boundaries, name=None): - return gen_math_ops._bucketize(input=input, boundaries=boundaries, name=name) + return gen_math_ops.bucketize(input=input, boundaries=boundaries, name=name) # pylint: enable=redefined-builtin @@ -322,10 +320,10 @@ def divide(x, y, name=None): @tf_export("multiply") def multiply(x, y, name=None): - return gen_math_ops._mul(x, y, name) + return gen_math_ops.mul(x, y, name) -multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`") +multiply.__doc__ = gen_math_ops.mul.__doc__.replace("Mul", "`tf.multiply`") # TODO(aselle): put deprecation in after another round of global code changes @@ -333,19 +331,19 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`") "2016-12-30", "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`") def _mul(x, y, name=None): - return gen_math_ops._mul(x, y, name) + return gen_math_ops.mul(x, y, name) _mul.__doc__ = ( - gen_math_ops._mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__)) + gen_math_ops.mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__)) @tf_export("subtract") def subtract(x, y, name=None): - return gen_math_ops._sub(x, y, name) + return gen_math_ops.sub(x, y, name) -subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`") +subtract.__doc__ = gen_math_ops.sub.__doc__.replace("`Sub`", "`tf.subtract`") # TODO(aselle): put deprecation in after another round of global code changes @@ -353,11 +351,11 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`") "2016-12-30", "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`") def _sub(x, y, name=None): - return gen_math_ops._sub(x, y, name) + return gen_math_ops.sub(x, y, name) _sub.__doc__ = ( - gen_math_ops._sub.__doc__ + ("" if _sub.__doc__ is None else _sub.__doc__)) + gen_math_ops.sub.__doc__ + ("" if _sub.__doc__ is None else _sub.__doc__)) # pylint: disable=g-docstring-has-escape @@ -377,11 +375,11 @@ def negative(x, name=None): """ with ops.name_scope(name, "Neg", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): - x_neg = gen_math_ops._neg(x.values, name=name) + x_neg = gen_math_ops.neg(x.values, name=name) return sparse_tensor.SparseTensor( indices=x.indices, values=x_neg, dense_shape=x.dense_shape) else: - return gen_math_ops._neg(x, name=name) + return gen_math_ops.neg(x, name=name) # pylint: enable=g-docstring-has-escape @@ -895,7 +893,7 @@ def to_bfloat16(x, name="ToBFloat16"): return cast(x, dtypes.bfloat16, name=name) -ops.Tensor._override_operator("__neg__", gen_math_ops._neg) +ops.Tensor._override_operator("__neg__", gen_math_ops.neg) ops.Tensor._override_operator("__abs__", abs) # __invert__ corresponds to the ~ operator. Here we follow the numpy convention # ~ marks an elementwise bit-wise inverse. This is only implemented for boolean @@ -1024,7 +1022,7 @@ def _truediv_python3(x, y, name=None): if dtype is not None: x = cast(x, dtype) y = cast(y, dtype) - return gen_math_ops._real_div(x, y, name=name) + return gen_math_ops.real_div(x, y, name=name) def _div_python2(x, y, name=None): @@ -1047,9 +1045,9 @@ def _div_python2(x, y, name=None): raise TypeError("x and y must have the same dtype, got %r != %r" % (x_dtype, y_dtype)) if x_dtype.is_floating or x_dtype.is_complex: - return gen_math_ops._real_div(x, y, name=name) + return gen_math_ops.real_div(x, y, name=name) else: - return gen_math_ops._floor_div(x, y, name=name) + return gen_math_ops.floor_div(x, y, name=name) @tf_export("truediv") @@ -1107,7 +1105,7 @@ def div(x, y, name=None): # TODO(aselle): This should be removed -mod = gen_math_ops._floor_mod +mod = gen_math_ops.floor_mod # TODO(aselle): Deprecate this once all internal functionality uses @@ -1140,22 +1138,22 @@ def floordiv(x, y, name=None): TypeError: If the inputs are complex. """ with ops.name_scope(name, "floordiv", [x, y]) as name: - return gen_math_ops._floor_div(x, y, name=name) + return gen_math_ops.floor_div(x, y, name=name) -realdiv = gen_math_ops._real_div -truncatediv = gen_math_ops._truncate_div +realdiv = gen_math_ops.real_div +truncatediv = gen_math_ops.truncate_div # TODO(aselle): Rename this to floordiv when we can. -floor_div = gen_math_ops._floor_div -truncatemod = gen_math_ops._truncate_mod -floormod = gen_math_ops._floor_mod +floor_div = gen_math_ops.floor_div +truncatemod = gen_math_ops.truncate_mod +floormod = gen_math_ops.floor_mod def _mul_dispatch(x, y, name=None): """Dispatches cwise mul for "Dense*Dense" and "Dense*Sparse".""" is_tensor_y = isinstance(y, ops.Tensor) if is_tensor_y: - return gen_math_ops._mul(x, y, name=name) + return gen_math_ops.mul(x, y, name=name) else: assert isinstance(y, sparse_tensor.SparseTensor) # Case: Dense * Sparse. new_vals = gen_sparse_ops.sparse_dense_cwise_mul(y.indices, y.values, @@ -1174,12 +1172,12 @@ _OverrideBinaryOperatorHelper(gen_sparse_ops.sparse_dense_cwise_mul, "mul", sparse_tensor.SparseTensor) _OverrideBinaryOperatorHelper(gen_math_ops.add, "add") -_OverrideBinaryOperatorHelper(gen_math_ops._sub, "sub") +_OverrideBinaryOperatorHelper(gen_math_ops.sub, "sub") _OverrideBinaryOperatorHelper(_mul_dispatch, "mul") _OverrideBinaryOperatorHelper(_div_python2, "div") _OverrideBinaryOperatorHelper(_truediv_python3, "truediv") _OverrideBinaryOperatorHelper(floordiv, "floordiv") -_OverrideBinaryOperatorHelper(gen_math_ops._floor_mod, "mod") +_OverrideBinaryOperatorHelper(gen_math_ops.floor_mod, "mod") _OverrideBinaryOperatorHelper(pow, "pow") @@ -1501,7 +1499,7 @@ def reduce_mean(input_tensor, if keepdims is None: keepdims = False return _may_reduce_to_scalar(keepdims, axis, reduction_indices, - gen_math_ops._mean( + gen_math_ops.mean( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), @@ -1551,7 +1549,7 @@ def reduce_prod(input_tensor, if keepdims is None: keepdims = False return _may_reduce_to_scalar(keepdims, axis, reduction_indices, - gen_math_ops._prod( + gen_math_ops.prod( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), @@ -2020,7 +2018,7 @@ def matmul(a, if transpose_b: b = conj(b) adjoint_b = True - return gen_math_ops._batch_mat_mul( + return gen_math_ops.batch_mat_mul( a, b, adj_x=adjoint_a, adj_y=adjoint_b, name=name) # Neither matmul nor sparse_matmul support adjoint, so we conjugate @@ -2057,13 +2055,13 @@ def matmul(a, ret = cast(ret, dtypes.bfloat16) return ret else: - return gen_math_ops._mat_mul( + return gen_math_ops.mat_mul( a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) _OverrideBinaryOperatorHelper(matmul, "matmul") -sparse_matmul = gen_math_ops._sparse_mat_mul +sparse_matmul = gen_math_ops.sparse_mat_mul @ops.RegisterStatistics("MatMul", "flops") @@ -2168,7 +2166,7 @@ def add_n(inputs, name=None): if name: return array_ops.identity(inputs[0], name=name) return inputs[0] - return gen_math_ops._add_n(inputs, name=name) + return gen_math_ops.add_n(inputs, name=name) @tf_export("accumulate_n") @@ -2246,7 +2244,7 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): # addressed return add_n(inputs, name=name) else: - return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access + return gen_math_ops.accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access @ops.RegisterGradient("AccumulateNV2") @@ -2276,7 +2274,7 @@ def sigmoid(x, name=None): """ with ops.name_scope(name, "Sigmoid", [x]) as name: x = ops.convert_to_tensor(x, name="x") - return gen_math_ops._sigmoid(x, name=name) + return gen_math_ops.sigmoid(x, name=name) @tf_export("log_sigmoid") @@ -2295,7 +2293,7 @@ def log_sigmoid(x, name=None): """ with ops.name_scope(name, "LogSigmoid", [x]) as name: x = ops.convert_to_tensor(x, name="x") - return gen_math_ops._neg(gen_nn_ops.softplus(-x), name=name) + return gen_math_ops.neg(gen_nn_ops.softplus(-x), name=name) @tf_export("nn.tanh", "tanh") @@ -2312,11 +2310,11 @@ def tanh(x, name=None): """ with ops.name_scope(name, "Tanh", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): - x_tanh = gen_math_ops._tanh(x.values, name=name) + x_tanh = gen_math_ops.tanh(x.values, name=name) return sparse_tensor.SparseTensor( indices=x.indices, values=x_tanh, dense_shape=x.dense_shape) else: - return gen_math_ops._tanh(x, name=name) + return gen_math_ops.tanh(x, name=name) @tf_export("bincount") @@ -2505,7 +2503,7 @@ def conj(x, name=None): with ops.name_scope(name, "Conj", [x]) as name: x = ops.convert_to_tensor(x, name="x") if x.dtype.is_complex or x.dtype == dtypes.variant: - return gen_math_ops._conj(x, name=name) + return gen_math_ops.conj(x, name=name) elif x.dtype.is_floating or x.dtype.is_integer: return x else: diff --git a/tensorflow/python/ops/nn_batchnorm_test.py b/tensorflow/python/ops/nn_batchnorm_test.py index eebfb17085..3ac2c8eb17 100644 --- a/tensorflow/python/ops/nn_batchnorm_test.py +++ b/tensorflow/python/ops/nn_batchnorm_test.py @@ -57,7 +57,6 @@ class BatchNormalizationTest(test.TestCase): test_util.set_producer_version(ops.get_default_graph(), 8) return gen_nn_ops._batch_norm_with_global_normalization( x, m, v, beta, gamma, epsilon, scale_after_normalization) - # pylint: enable=protected-access def _tfBatchNormV1BW(self, x, m, v, beta, gamma, epsilon, scale_after_normalization): @@ -223,7 +222,7 @@ class BatchNormalizationTest(test.TestCase): for scale_after_normalization in [True, False]: # _batch_norm_with_global_normalization_grad is deprecated in v9 test_util.set_producer_version(ops.get_default_graph(), 8) - grad = gen_nn_ops._batch_norm_with_global_normalization_grad( + grad = gen_nn_ops.batch_norm_with_global_normalization_grad( x, m, v, gamma, backprop, epsilon, scale_after_normalization) dx, dm, dv, db, dg = grad self.assertEqual(grad.dx, dx) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index dc24b821a5..5582daf2da 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -150,7 +150,7 @@ def _Conv3DBackpropFilterGrad(op, grad): @ops.RegisterGradient("AvgPool3D") def _AvgPool3DGrad(op, grad): - return gen_nn_ops._avg_pool3d_grad( + return gen_nn_ops.avg_pool3d_grad( array_ops.shape(op.inputs[0]), grad, ksize=op.get_attr("ksize"), @@ -172,7 +172,7 @@ def _AvgPool3DGradGrad(op, grad): @ops.RegisterGradient("MaxPool3D") def _MaxPool3DGrad(op, grad): - return gen_nn_ops._max_pool3d_grad( + return gen_nn_ops.max_pool3d_grad( op.inputs[0], op.outputs[0], grad, @@ -188,7 +188,7 @@ def _MaxPool3DGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool3d_grad_grad( + gen_nn_ops.max_pool3d_grad_grad( op.inputs[0], op.inputs[1], grad, @@ -204,7 +204,7 @@ def _MaxPool3DGradGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool3d_grad( + gen_nn_ops.max_pool3d_grad( op.inputs[0], op.inputs[1], grad, @@ -352,13 +352,13 @@ def _BiasAddGradV1(unused_bias_op, received_grad): @ops.RegisterGradient("Relu") def _ReluGrad(op, grad): - return gen_nn_ops._relu_grad(grad, op.outputs[0]) + return gen_nn_ops.relu_grad(grad, op.outputs[0]) @ops.RegisterGradient("EluGrad") def _EluGradGrad(op, grad): elu_x = op.inputs[1] - return (gen_nn_ops._elu_grad(grad, op.outputs[0]), + return (gen_nn_ops.elu_grad(grad, op.outputs[0]), array_ops.where(elu_x < 0, grad * op.inputs[0], array_ops.zeros( shape=array_ops.shape(elu_x), dtype=elu_x.dtype))) @@ -368,63 +368,63 @@ def _EluGradGrad(op, grad): def _SeluGradGrad(op, grad): x = op.inputs[1] scale_alpha = 1.7580993408473768599402175208123 - return (gen_nn_ops._elu_grad(grad, op.outputs[0]), + return (gen_nn_ops.elu_grad(grad, op.outputs[0]), array_ops.where(x < 0., - gen_nn_ops._elu_grad(grad, - op.outputs[0] + scale_alpha), + gen_nn_ops.elu_grad(grad, + op.outputs[0] + scale_alpha), array_ops.zeros( shape=array_ops.shape(x), dtype=x.dtype))) @ops.RegisterGradient("Relu6") def _Relu6Grad(op, grad): - return gen_nn_ops._relu6_grad(grad, op.outputs[0]) # pylint: disable=protected-access + return gen_nn_ops.relu6_grad(grad, op.outputs[0]) @ops.RegisterGradient("Relu6Grad") def _Relu6GradGrad(op, grad): x = op.inputs[1] - return (gen_nn_ops._relu6_grad(grad, x), + return (gen_nn_ops.relu6_grad(grad, x), array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) @ops.RegisterGradient("Elu") def _EluGrad(op, grad): - return gen_nn_ops._elu_grad(grad, op.outputs[0]) + return gen_nn_ops.elu_grad(grad, op.outputs[0]) @ops.RegisterGradient("Selu") def _SeluGrad(op, grad): - return gen_nn_ops._selu_grad(grad, op.outputs[0]) + return gen_nn_ops.selu_grad(grad, op.outputs[0]) @ops.RegisterGradient("Softplus") def _SoftplusGrad(op, grad): - return gen_nn_ops._softplus_grad(grad, op.inputs[0]) + return gen_nn_ops.softplus_grad(grad, op.inputs[0]) @ops.RegisterGradient("SoftplusGrad") def _SoftplusGradGrad(op, grad): # Let: # y = tf.nn.softplus(x) - # dx = gen_nn_ops._softplus_grad(dy, x) = dy / (1 + exp(-x)) + # dx = gen_nn_ops.softplus_grad(dy, x) = dy / (1 + exp(-x)) # This op computes (ddy, d2x) from op.inputs == [dy, x] and grad == ddx. dy, x = op.inputs with ops.control_dependencies([grad]): - ddy = gen_nn_ops._softplus_grad(grad, x) # pylint: disable=protected-access + ddy = gen_nn_ops.softplus_grad(grad, x) d2x = grad * dy / (math_ops.exp(-x) + 2.0 + math_ops.exp(x)) return (ddy, d2x) @ops.RegisterGradient("Softsign") def _SoftsignGrad(op, grad): - return gen_nn_ops._softsign_grad(grad, op.inputs[0]) + return gen_nn_ops.softsign_grad(grad, op.inputs[0]) @ops.RegisterGradient("ReluGrad") def _ReluGradGrad(op, grad): x = op.inputs[1] - return (gen_nn_ops._relu_grad(grad, x), + return (gen_nn_ops.relu_grad(grad, x), array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) @@ -565,14 +565,14 @@ def _LRNGrad(op, grad): alpha = op.get_attr("alpha") beta = op.get_attr("beta") return [ - gen_nn_ops._lrn_grad(grad, op.inputs[0], op.outputs[0], depth_radius, - bias, alpha, beta) + gen_nn_ops.lrn_grad(grad, op.inputs[0], op.outputs[0], depth_radius, bias, + alpha, beta) ] @ops.RegisterGradient("AvgPool") def _AvgPoolGrad(op, grad): - return gen_nn_ops._avg_pool_grad( + return gen_nn_ops.avg_pool_grad( array_ops.shape(op.inputs[0]), grad, op.get_attr("ksize"), @@ -584,7 +584,7 @@ def _AvgPoolGrad(op, grad): @ops.RegisterGradient("AvgPoolGrad") def _AvgPoolGradGrad(op, grad): return (array_ops.stop_gradient(op.inputs[0]), - gen_nn_ops._avg_pool( + gen_nn_ops.avg_pool( grad, op.get_attr("ksize"), op.get_attr("strides"), @@ -594,7 +594,7 @@ def _AvgPoolGradGrad(op, grad): @ops.RegisterGradient("MaxPool") def _MaxPoolGrad(op, grad): - return gen_nn_ops._max_pool_grad( + return gen_nn_ops.max_pool_grad( op.inputs[0], op.outputs[0], grad, @@ -620,7 +620,7 @@ def _MaxPoolGradV2(op, grad): @ops.RegisterGradient("MaxPoolWithArgmax") def _MaxPoolGradWithArgmax(op, grad, unused_argmax_grad): - return gen_nn_ops._max_pool_grad_with_argmax( + return gen_nn_ops.max_pool_grad_with_argmax( op.inputs[0], grad, op.outputs[1], @@ -635,7 +635,7 @@ def _MaxPoolGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool_grad_grad( + gen_nn_ops.max_pool_grad_grad( op.inputs[0], op.inputs[1], grad, @@ -669,7 +669,7 @@ def _MaxPoolGradGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool_grad( + gen_nn_ops.max_pool_grad( op.inputs[0], op.inputs[1], grad, @@ -696,8 +696,7 @@ def _FractionalMaxPoolGrad(op, grad_0, unused_grad_1, unused_grad_2): Returns: Input backprop for FractionalMaxPool op. """ - # pylint: disable=protected-access - return gen_nn_ops._fractional_max_pool_grad( + return gen_nn_ops.fractional_max_pool_grad( op.inputs[0], op.outputs[0], grad_0, op.outputs[1], op.outputs[2], op.get_attr("overlapping")) @@ -719,10 +718,9 @@ def _FractionalAvgPoolGrad(op, grad_0, unused_grad_1, unused_grad_2): Returns: Input backprop for FractionalAvgPool op. """ - # pylint: disable=protected-access - return gen_nn_ops._fractional_avg_pool_grad(op.inputs[0].get_shape(), grad_0, - op.outputs[1], op.outputs[2], - op.get_attr("overlapping")) + return gen_nn_ops.fractional_avg_pool_grad(op.inputs[0].get_shape(), grad_0, + op.outputs[1], op.outputs[2], + op.get_attr("overlapping")) @ops.RegisterGradient("BatchNormWithGlobalNormalization") @@ -746,7 +744,7 @@ def _BatchNormWithGlobalNormalizationGrad(op, grad): last dimension. dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon))) """ - dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad( + dx, dm, dv, db, dg = gen_nn_ops.batch_norm_with_global_normalization_grad( op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[4], grad, op.get_attr("variance_epsilon"), op.get_attr("scale_after_normalization")) return dx, dm, dv, db, dg diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 5fa5708114..7814a27311 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -888,12 +888,10 @@ def fused_batch_norm( # TODO(reedwm): In a few weeks, switch to using the V2 version exclusively. We # currently only use the V2 version for float16 inputs, which is not supported # by the V1 version. - # pylint: disable=protected-access if x.dtype == dtypes.float16 or x.dtype == dtypes.bfloat16: - fused_batch_norm_func = gen_nn_ops._fused_batch_norm_v2 + fused_batch_norm_func = gen_nn_ops.fused_batch_norm_v2 else: - fused_batch_norm_func = gen_nn_ops._fused_batch_norm - # pylint: enable=protected-access + fused_batch_norm_func = gen_nn_ops._fused_batch_norm # pylint: disable=protected-access y, batch_mean, batch_var, _, _ = fused_batch_norm_func( x, scale, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 8fbe698914..a0d500afce 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1481,7 +1481,6 @@ def conv3d_transpose( name=name) -# pylint: disable=protected-access @tf_export("nn.bias_add") def bias_add(value, bias, data_format=None, name=None): """Adds `bias` to `value`. @@ -1506,10 +1505,9 @@ def bias_add(value, bias, data_format=None, name=None): with ops.name_scope(name, "BiasAdd", [value, bias]) as name: value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") - return gen_nn_ops._bias_add(value, bias, data_format=data_format, name=name) + return gen_nn_ops.bias_add(value, bias, data_format=data_format, name=name) -# pylint: disable=protected-access def bias_add_v1(value, bias, name=None): """Adds `bias` to `value`. @@ -1534,7 +1532,7 @@ def bias_add_v1(value, bias, name=None): with ops.name_scope(name, "BiasAddV1", [value, bias]) as name: value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") - return gen_nn_ops._bias_add_v1(value, bias, name=name) + return gen_nn_ops.bias_add_v1(value, bias, name=name) @tf_export("nn.crelu") @@ -1580,7 +1578,7 @@ def relu6(features, name=None): """ with ops.name_scope(name, "Relu6", [features]) as name: features = ops.convert_to_tensor(features, name="features") - return gen_nn_ops._relu6(features, name=name) + return gen_nn_ops.relu6(features, name=name) @tf_export("nn.leaky_relu") @@ -1645,7 +1643,7 @@ def _softmax(logits, compute_op, dim=-1, name=None): Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - compute_op: Either gen_nn_ops._softmax or gen_nn_ops._log_softmax + compute_op: Either gen_nn_ops.softmax or gen_nn_ops.log_softmax dim: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1739,7 +1737,7 @@ def softmax(logits, axis=None, name=None, dim=None): axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 - return _softmax(logits, gen_nn_ops._softmax, axis, name) + return _softmax(logits, gen_nn_ops.softmax, axis, name) @tf_export("nn.log_softmax") @@ -1769,7 +1767,7 @@ def log_softmax(logits, axis=None, name=None, dim=None): axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 - return _softmax(logits, gen_nn_ops._log_softmax, axis, name) + return _softmax(logits, gen_nn_ops.log_softmax, axis, name) def _ensure_xent_args(name, sentinel, labels, logits): @@ -1871,7 +1869,7 @@ def softmax_cross_entropy_with_logits_v2( # Do the actual op computation. # The second output tensor contains the gradients. We use it in # _CrossEntropyGrad() in nn_grad but not here. - cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + cost, unused_backprop = gen_nn_ops.softmax_cross_entropy_with_logits( precise_logits, labels, name=name) # The output cost shape should be the input minus dim. @@ -2038,7 +2036,7 @@ def sparse_softmax_cross_entropy_with_logits( (labels_static_shape.ndims, logits.get_shape().ndims)) # Check if no reshapes are required. if logits.get_shape().ndims == 2: - cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( precise_logits, labels, name=name) if logits.dtype == dtypes.float16: return math_ops.cast(cost, dtypes.float16) @@ -2051,7 +2049,7 @@ def sparse_softmax_cross_entropy_with_logits( labels = array_ops.reshape(labels, [-1]) # The second output tensor contains the gradients. We use it in # _CrossEntropyGrad() in nn_grad but not here. - cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( precise_logits, labels, name=name) cost = array_ops.reshape(cost, labels_shape) cost.set_shape(labels_static_shape) @@ -2086,7 +2084,7 @@ def avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None): """ with ops.name_scope(name, "AvgPool", [value]) as name: value = ops.convert_to_tensor(value, name="input") - return gen_nn_ops._avg_pool( + return gen_nn_ops.avg_pool( value, ksize=ksize, strides=strides, @@ -2116,12 +2114,13 @@ def max_pool(value, ksize, strides, padding, data_format="NHWC", name=None): """ with ops.name_scope(name, "MaxPool", [value]) as name: value = ops.convert_to_tensor(value, name="input") - return gen_nn_ops._max_pool(value, - ksize=ksize, - strides=strides, - padding=padding, - data_format=data_format, - name=name) + return gen_nn_ops.max_pool( + value, + ksize=ksize, + strides=strides, + padding=padding, + data_format=data_format, + name=name) @ops.RegisterStatistics("Conv2D", "flops") @@ -2331,7 +2330,7 @@ def top_k(input, k=1, sorted=True, name=None): # pylint: disable=redefined-buil values: The `k` largest elements along each last dimensional slice. indices: The indices of `values` within the last dimension of `input`. """ - return gen_nn_ops._top_kv2(input, k=k, sorted=sorted, name=name) + return gen_nn_ops.top_kv2(input, k=k, sorted=sorted, name=name) def nth_element(input, n, reverse=False, name=None): # pylint: disable=redefined-builtin @@ -2650,4 +2649,4 @@ def in_top_k(predictions, targets, k, name=None): A `Tensor` of type `bool`. Computed Precision at `k` as a `bool Tensor`. """ with ops.name_scope(name, "in_top_k"): - return gen_nn_ops._in_top_kv2(predictions, targets, k, name=name) + return gen_nn_ops.in_top_kv2(predictions, targets, k, name=name) diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index b0315ceee2..075b38d743 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -700,8 +700,7 @@ def _parse_example_raw(serialized, # Finally, convert dense_shapes to TensorShapeProto dense_shapes = [shape.as_proto() for shape in dense_shapes] - # pylint: disable=protected-access - outputs = gen_parsing_ops._parse_example( + outputs = gen_parsing_ops.parse_example( serialized=serialized, names=names, dense_defaults=dense_defaults_vec, @@ -710,7 +709,6 @@ def _parse_example_raw(serialized, dense_keys=dense_keys, dense_shapes=dense_shapes, name=name) - # pylint: enable=protected-access (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs @@ -1132,8 +1130,7 @@ def _parse_single_sequence_example_raw(serialized, feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto() for shape in feature_list_dense_shapes] - # pylint: disable=protected-access - outputs = gen_parsing_ops._parse_single_sequence_example( + outputs = gen_parsing_ops.parse_single_sequence_example( serialized=serialized, debug_name=debug_name, context_dense_defaults=context_dense_defaults_vec, @@ -1149,7 +1146,6 @@ def _parse_single_sequence_example_raw(serialized, feature_list_dense_missing_assumed_empty=( feature_list_dense_missing_assumed_empty), name=name) - # pylint: enable=protected-access (context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, @@ -1182,7 +1178,6 @@ def _parse_single_sequence_example_raw(serialized, @tf_export("decode_csv") def decode_csv(records, record_defaults, field_delim=",", use_quote_delim=True, name=None, na_value=""): - # pylint: disable=protected-access """Convert CSV records to tensors. Each column maps to one tensor. RFC 4180 format is expected for the CSV records. @@ -1211,11 +1206,13 @@ def decode_csv(records, record_defaults, field_delim=",", Each tensor will have the same shape as records. """ # TODO(martinwicke), remove the wrapper when new Python API generator is done. - return gen_parsing_ops._decode_csv( - records=records, record_defaults=record_defaults, - field_delim=field_delim, use_quote_delim=use_quote_delim, - na_value=na_value, name=name) - # pylint: enable=protected-access + return gen_parsing_ops.decode_csv( + records=records, + record_defaults=record_defaults, + field_delim=field_delim, + use_quote_delim=use_quote_delim, + na_value=na_value, + name=name) # TODO(b/70890287): Combine the implementation of this op and @@ -1391,7 +1388,6 @@ def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types, # Finally, convert dense_shapes to TensorShapeProto dense_shapes = [shape.as_proto() for shape in dense_shapes] - # pylint: disable=protected-access outputs = gen_parsing_ops.parse_single_example( serialized=serialized, dense_defaults=dense_defaults_vec, @@ -1401,7 +1397,6 @@ def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types, dense_keys=dense_keys, dense_shapes=dense_shapes, name=name) - # pylint: enable=protected-access (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py index 2c86358d21..db8159579a 100644 --- a/tensorflow/python/ops/random_ops.py +++ b/tensorflow/python/ops/random_ops.py @@ -43,7 +43,6 @@ def _ShapeTensor(shape): return ops.convert_to_tensor(shape, dtype=dtype, name="shape") -# pylint: disable=protected-access @tf_export("random_normal") def random_normal(shape, mean=0.0, @@ -74,7 +73,7 @@ def random_normal(shape, mean_tensor = ops.convert_to_tensor(mean, dtype=dtype, name="mean") stddev_tensor = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") seed1, seed2 = random_seed.get_seed(seed) - rnd = gen_random_ops._random_standard_normal( + rnd = gen_random_ops.random_standard_normal( shape_tensor, dtype, seed=seed1, seed2=seed2) mul = rnd * stddev_tensor value = math_ops.add(mul, mean_tensor, name=name) @@ -126,7 +125,7 @@ def parameterized_truncated_normal(shape, minvals_tensor = ops.convert_to_tensor(minvals, dtype=dtype, name="minvals") maxvals_tensor = ops.convert_to_tensor(maxvals, dtype=dtype, name="maxvals") seed1, seed2 = random_seed.get_seed(seed) - rnd = gen_random_ops._parameterized_truncated_normal( + rnd = gen_random_ops.parameterized_truncated_normal( shape_tensor, means_tensor, stddevs_tensor, @@ -171,7 +170,7 @@ def truncated_normal(shape, mean_tensor = ops.convert_to_tensor(mean, dtype=dtype, name="mean") stddev_tensor = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") seed1, seed2 = random_seed.get_seed(seed) - rnd = gen_random_ops._truncated_normal( + rnd = gen_random_ops.truncated_normal( shape_tensor, dtype, seed=seed1, seed2=seed2) mul = rnd * stddev_tensor value = math_ops.add(mul, mean_tensor, name=name) @@ -237,11 +236,10 @@ def random_uniform(shape, maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max") seed1, seed2 = random_seed.get_seed(seed) if dtype.is_integer: - return gen_random_ops._random_uniform_int( + return gen_random_ops.random_uniform_int( shape, minval, maxval, seed=seed1, seed2=seed2, name=name) else: - rnd = gen_random_ops._random_uniform( - shape, dtype, seed=seed1, seed2=seed2) + rnd = gen_random_ops.random_uniform(shape, dtype, seed=seed1, seed2=seed2) return math_ops.add(rnd * (maxval - minval), minval, name=name) @@ -275,7 +273,7 @@ def random_shuffle(value, seed=None, name=None): dimension. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_random_ops._random_shuffle( + return gen_random_ops.random_shuffle( value, seed=seed1, seed2=seed2, name=name) @@ -420,7 +418,7 @@ def random_gamma(shape, seed1, seed2 = random_seed.get_seed(seed) return math_ops.maximum( np.finfo(dtype.as_numpy_dtype).tiny, - gen_random_ops._random_gamma( + gen_random_ops.random_gamma( shape, alpha_broadcast, seed=seed1, seed2=seed2) / beta) ops.NotDifferentiable("RandomGamma") diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 6fe2f61016..01f0b81684 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -219,18 +219,16 @@ def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): graph._cleanup_py_funcs_used_in_graph.append(cleanup) # pylint: enable=protected-access - # pylint: disable=protected-access if eager: - result = gen_script_ops._eager_py_func( + result = gen_script_ops.eager_py_func( input=inp, token=token, Tout=Tout, name=name) else: if stateful: - result = gen_script_ops._py_func( + result = gen_script_ops.py_func( input=inp, token=token, Tout=Tout, name=name) else: - result = gen_script_ops._py_func_stateless( + result = gen_script_ops.py_func_stateless( input=inp, token=token, Tout=Tout, name=name) - # pylint: enable=protected-access return result if is_list_or_tuple else result[0] diff --git a/tensorflow/python/ops/session_ops.py b/tensorflow/python/ops/session_ops.py index cedd36c1de..ad38845153 100644 --- a/tensorflow/python/ops/session_ops.py +++ b/tensorflow/python/ops/session_ops.py @@ -16,7 +16,6 @@ """Tensor Handle Operations. See the @{$python/session_ops} guide. @@get_session_handle -@@get_session_handle_v2 @@get_session_tensor @@delete_session_tensor """ @@ -182,7 +181,7 @@ def get_session_handle(data, name=None): # Colocate this operation with data. with ops.colocate_with(data): - return gen_data_flow_ops._get_session_handle(data, name=name) # pylint: disable=protected-access + return gen_data_flow_ops.get_session_handle(data, name=name) @tf_export("get_session_tensor") @@ -222,7 +221,7 @@ def get_session_tensor(handle, dtype, name=None): with ops.device(handle_device): holder = array_ops.placeholder(dtypes.string) _register_handle_feeder(holder.graph, holder, dtype) - tensor = gen_data_flow_ops._get_session_tensor(holder, dtype, name=name) + tensor = gen_data_flow_ops.get_session_tensor(holder, dtype, name=name) return (holder, tensor) @@ -246,7 +245,7 @@ def delete_session_tensor(handle, name=None): handle_device = TensorHandle._get_device_name(handle) with ops.device(handle_device): holder = array_ops.placeholder(dtypes.string) - deleter = gen_data_flow_ops._delete_session_tensor(holder, name=name) + deleter = gen_data_flow_ops.delete_session_tensor(holder, name=name) return (holder, deleter) @@ -268,7 +267,7 @@ def _get_handle_reader(graph, handle, dtype): with graph.as_default(), graph.device(handle_device): holder = array_ops.placeholder(dtypes.string) _register_handle_feeder(holder.graph, holder, dtype) - reader = gen_data_flow_ops._get_session_tensor(holder, dtype) + reader = gen_data_flow_ops.get_session_tensor(holder, dtype) result = (holder, reader) graph._handle_readers[graph_key] = result return result @@ -289,7 +288,7 @@ def _get_handle_mover(graph, feeder, handle): # Create mover if we haven't done it. holder, reader = _get_handle_reader(graph, handle, dtype) with graph.as_default(), graph.device(feeder.op.device): - mover = gen_data_flow_ops._get_session_handle(reader) # pylint: disable=protected-access + mover = gen_data_flow_ops.get_session_handle(reader) result = (holder, mover) graph._handle_movers[graph_key] = result return result @@ -303,7 +302,7 @@ def _get_handle_deleter(graph, deleter_key, handle): handle_device = TensorHandle._get_device_name(handle) with graph.as_default(), graph.device(handle_device): holder = array_ops.placeholder(dtypes.string) - deleter = gen_data_flow_ops._delete_session_tensor(holder) + deleter = gen_data_flow_ops.delete_session_tensor(holder) result = (holder, deleter) graph._handle_deleters[deleter_key] = result return result diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py index 5295e7d21c..97353d6c74 100644 --- a/tensorflow/python/ops/sparse_grad.py +++ b/tensorflow/python/ops/sparse_grad.py @@ -88,10 +88,8 @@ def _SparseAddGrad(op, *grads): # the non-zero elements of the sum, and we will peek into `sum_indices` in the # gradient op. - # pylint: disable=protected-access - a_val_grad, b_val_grad = gen_sparse_ops._sparse_add_grad(val_grad, a_indices, - b_indices, - sum_indices) + a_val_grad, b_val_grad = gen_sparse_ops.sparse_add_grad( + val_grad, a_indices, b_indices, sum_indices) a_val_grad.set_shape(op.inputs[1].get_shape()) b_val_grad.set_shape(op.inputs[4].get_shape()) # (a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh) @@ -151,7 +149,7 @@ def _SparseTensorDenseMatMulGrad(op, grad): "complex gradients.") # gradient w.r.t. dense - b_grad = gen_sparse_ops._sparse_tensor_dense_mat_mul( # pylint: disable=protected-access + b_grad = gen_sparse_ops.sparse_tensor_dense_mat_mul( a_indices, a_values, a_shape, grad, adjoint_a=not adj_a) if adj_b: b_grad = array_ops.transpose(b_grad) @@ -278,8 +276,7 @@ def _SparseFillEmptyRowsGrad(op, unused_grad_output_indices, output_grad_values, """Gradients for SparseFillEmptyRows.""" reverse_index_map = op.outputs[3] - # pylint: disable=protected-access - d_values, d_default_value = gen_sparse_ops._sparse_fill_empty_rows_grad( + d_values, d_default_value = gen_sparse_ops.sparse_fill_empty_rows_grad( reverse_index_map=reverse_index_map, grad_values=output_grad_values) # d_indices, d_values, d_dense_shape, d_default_value. diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 0fbbf5a805..a01bba632f 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -234,7 +234,7 @@ def sparse_concat(axis, ] output_ind, output_val, output_shape = ( - gen_sparse_ops._sparse_concat(inds, vals, shapes, axis, name=name)) + gen_sparse_ops.sparse_concat(inds, vals, shapes, axis, name=name)) return sparse_tensor.SparseTensor(output_ind, output_val, output_shape) @@ -302,8 +302,8 @@ def sparse_add(a, b, thresh=0): thresh = ops.convert_to_tensor( thresh, dtype=a.values.dtype.real_dtype.base_dtype, name="thresh") output_ind, output_val, output_shape = ( - gen_sparse_ops._sparse_add(a.indices, a.values, a.dense_shape, - b.indices, b.values, b.dense_shape, thresh)) + gen_sparse_ops.sparse_add(a.indices, a.values, a.dense_shape, + b.indices, b.values, b.dense_shape, thresh)) # Attempt to get output_shape statically. a.get_shape().assert_is_compatible_with(b.get_shape()) @@ -317,8 +317,8 @@ def sparse_add(a, b, thresh=0): # swap to make `a` the SparseTensor. if isinstance(b, sparse_classes): a, b = b, a - return gen_sparse_ops._sparse_tensor_dense_add(a.indices, a.values, - a.dense_shape, b) + return gen_sparse_ops.sparse_tensor_dense_add(a.indices, a.values, + a.dense_shape, b) def _sparse_cross(inputs, name=None): @@ -402,7 +402,7 @@ def _sparse_cross_internal(inputs, num_buckets=0, hash_key=None, name=None): - """See gen_sparse_ops._sparse_cross.""" + """See gen_sparse_ops.sparse_cross.""" if not isinstance(inputs, list): raise TypeError("Inputs must be a list") if not all( @@ -432,7 +432,7 @@ def _sparse_cross_internal(inputs, dense_inputs[i] = math_ops.to_int64(dense_inputs[i]) internal_type = dtypes.int64 - indices_out, values_out, shape_out = gen_sparse_ops._sparse_cross( + indices_out, values_out, shape_out = gen_sparse_ops.sparse_cross( indices=indices, values=values, shapes=shapes, @@ -511,7 +511,7 @@ def sparse_reorder(sp_input, name=None): sp_input = _convert_to_sparse_tensor(sp_input) reordered_ind, reordered_val = ( - gen_sparse_ops._sparse_reorder( + gen_sparse_ops.sparse_reorder( sp_input.indices, sp_input.values, sp_input.dense_shape, name=name)) if sp_input.get_shape().is_fully_defined(): @@ -575,7 +575,7 @@ def sparse_reshape(sp_input, shape, name=None): shape = math_ops.cast(shape, dtype=dtypes.int64) with ops.name_scope(name, "SparseReshape", [sp_input]) as name: - reshaped_ind, reshaped_shape = gen_sparse_ops._sparse_reshape( + reshaped_ind, reshaped_shape = gen_sparse_ops.sparse_reshape( sp_input.indices, sp_input.dense_shape, shape, name=name) reshaped_shape_const = tensor_util.constant_value(shape) @@ -671,7 +671,7 @@ def sparse_split(keyword_required=KeywordRequired(), sp_input = _convert_to_sparse_tensor(sp_input) output_inds, output_vals, output_shapes = ( - gen_sparse_ops._sparse_split( + gen_sparse_ops.sparse_split( axis, sp_input.indices, sp_input.values, @@ -782,7 +782,7 @@ def sparse_to_dense(sparse_indices, Dense `Tensor` of shape `output_shape`. Has the same type as `sparse_values`. """ - return gen_sparse_ops._sparse_to_dense( + return gen_sparse_ops.sparse_to_dense( sparse_indices, output_shape, sparse_values, @@ -1412,7 +1412,7 @@ def sparse_fill_empty_rows(sp_input, default_value, name=None): default_value = ops.convert_to_tensor( default_value, dtype=sp_input.values.dtype) (output_indices, output_values, empty_row_indicator, - unused_reverse_index_map) = gen_sparse_ops._sparse_fill_empty_rows( + unused_reverse_index_map) = gen_sparse_ops.sparse_fill_empty_rows( indices=sp_input.indices, values=sp_input.values, dense_shape=sp_input.dense_shape, @@ -1441,7 +1441,7 @@ def serialize_sparse(sp_input, name=None, out_type=dtypes.string): """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._serialize_sparse( + return gen_sparse_ops.serialize_sparse( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -1476,7 +1476,7 @@ def serialize_many_sparse(sp_input, name=None, out_type=dtypes.string): """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._serialize_many_sparse( + return gen_sparse_ops.serialize_many_sparse( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -1541,7 +1541,7 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None): """ output_indices, output_values, output_shape = ( - gen_sparse_ops._deserialize_sparse(serialized_sparse, dtype, name=name)) + gen_sparse_ops.deserialize_sparse(serialized_sparse, dtype, name=name)) # Feed rank data back in, if available output_indices.set_shape([None, rank]) @@ -1610,7 +1610,7 @@ def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None): All of the serialized `SparseTensor`s must have had the same rank and type. """ output_indices, output_values, output_shape = ( - gen_sparse_ops._deserialize_many_sparse( + gen_sparse_ops.deserialize_many_sparse( serialized_sparse, dtype, name=name)) # Feed rank data back in, if available @@ -1828,7 +1828,7 @@ def sparse_tensor_dense_matmul(sp_a, with ops.name_scope(name, "SparseTensorDenseMatMul", [sp_a.indices, sp_a.values, b]) as name: b = ops.convert_to_tensor(b, name="b") - return gen_sparse_ops._sparse_tensor_dense_mat_mul( + return gen_sparse_ops.sparse_tensor_dense_mat_mul( a_indices=sp_a.indices, a_values=sp_a.values, a_shape=sp_a.dense_shape, diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py index b62e556967..65b788c31a 100644 --- a/tensorflow/python/ops/standard_ops.py +++ b/tensorflow/python/ops/standard_ops.py @@ -186,7 +186,6 @@ _allowed_symbols_array_ops = [ "quantize_and_dequantize", # to-doc # TODO(drpng): legacy symbols to be removed. - "list_diff", # Use tf.listdiff instead. "batch_matrix_diag", "batch_matrix_band_part", "batch_matrix_diag_part", diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index 6c0a090d16..fd4419640a 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -99,8 +99,8 @@ def variable_op(shape, dtype, name="Variable", set_shape=True, container="", """Deprecated. Used variable_op_v2 instead.""" if not set_shape: shape = tensor_shape.unknown_shape() - ret = gen_state_ops._variable(shape=shape, dtype=dtype, name=name, - container=container, shared_name=shared_name) + ret = gen_state_ops.variable(shape=shape, dtype=dtype, name=name, + container=container, shared_name=shared_name) # TODO(mrry): Move this to where it is used, so we can get rid of this op # wrapper? if set_shape: @@ -127,11 +127,12 @@ def variable_op_v2(shape, dtype, name="Variable", container="", shared_name=""): Returns: A variable tensor. """ - return gen_state_ops._variable_v2(shape=shape, - dtype=dtype, - name=name, - container=container, - shared_name=shared_name) + return gen_state_ops.variable_v2( + shape=shape, + dtype=dtype, + name=name, + container=container, + shared_name=shared_name) def init_variable(v, init, name="init"): diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index b8c39d91b4..0335d2456a 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -93,10 +93,8 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string) source = ops.convert_to_tensor(source, dtype=dtypes.string) - # pylint: disable=protected-access - indices, values, shape = gen_string_ops._string_split( + indices, values, shape = gen_string_ops.string_split( source, delimiter=delimiter, skip_empty=skip_empty) - # pylint: enable=protected-access indices.set_shape([None, 2]) values.set_shape([None]) shape.set_shape([2]) diff --git a/tensorflow/python/ops/summary_ops.py b/tensorflow/python/ops/summary_ops.py index 7f4f4ce5ab..037bc9845a 100644 --- a/tensorflow/python/ops/summary_ops.py +++ b/tensorflow/python/ops/summary_ops.py @@ -13,7 +13,6 @@ # limitations under the License. # ============================================================================== """Summary Operations.""" -# pylint: disable=protected-access from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -74,7 +73,7 @@ def tensor_summary(name, with summary_op_util.summary_scope( name, family, values=[tensor]) as (tag, scope): - val = gen_logging_ops._tensor_summary_v2( + val = gen_logging_ops.tensor_summary_v2( tensor=tensor, tag=tag, name=scope, diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py index 3c08870146..6226f426be 100644 --- a/tensorflow/python/ops/tensor_array_ops.py +++ b/tensorflow/python/ops/tensor_array_ops.py @@ -148,7 +148,7 @@ class _GraphTensorArray(object): # will retroactively set the device value of this op. def create(): """Create the TensorArray op.""" - return gen_data_flow_ops._tensor_array_v3( + return gen_data_flow_ops.tensor_array_v3( dtype=dtype, size=size, element_shape=element_shape, @@ -237,7 +237,7 @@ class _GraphTensorArray(object): flow = self.flow with ops.name_scope(name, "TensorArrayGrad", [self._handle]): with ops.colocate_with(self._handle): - g_handle, unused_flow = gen_data_flow_ops._tensor_array_grad_v3( + g_handle, unused_flow = gen_data_flow_ops.tensor_array_grad_v3( handle=self._handle, source=source, flow_in=flow, name=name) with ops.control_dependencies([g_handle]): flow = array_ops.identity(flow, name="gradient_flow") @@ -252,7 +252,7 @@ class _GraphTensorArray(object): def read(self, index, name=None): """See TensorArray.""" - value = gen_data_flow_ops._tensor_array_read_v3( + value = gen_data_flow_ops.tensor_array_read_v3( handle=self._handle, index=index, flow_in=self._flow, @@ -270,7 +270,7 @@ class _GraphTensorArray(object): if self._infer_shape: self._merge_element_shape(value.shape) with self._maybe_colocate_with(value): - flow_out = gen_data_flow_ops._tensor_array_write_v3( + flow_out = gen_data_flow_ops.tensor_array_write_v3( handle=self._handle, index=index, value=value, @@ -296,7 +296,7 @@ class _GraphTensorArray(object): element_shape = self._element_shape[0] else: element_shape = tensor_shape.TensorShape(None) - value = gen_data_flow_ops._tensor_array_gather_v3( + value = gen_data_flow_ops.tensor_array_gather_v3( handle=self._handle, indices=indices, flow_in=self._flow, @@ -314,7 +314,7 @@ class _GraphTensorArray(object): tensor_shape.TensorShape(self._element_shape[0].dims[1:])) else: element_shape_except0 = tensor_shape.TensorShape(None) - value, _ = gen_data_flow_ops._tensor_array_concat_v3( + value, _ = gen_data_flow_ops.tensor_array_concat_v3( handle=self._handle, flow_in=self._flow, dtype=self._dtype, @@ -341,7 +341,7 @@ class _GraphTensorArray(object): if self._infer_shape and context.in_graph_mode(): self._merge_element_shape(value.shape[1:]) with self._maybe_colocate_with(value): - flow_out = gen_data_flow_ops._tensor_array_scatter_v3( + flow_out = gen_data_flow_ops.tensor_array_scatter_v3( handle=self._handle, indices=indices, value=value, @@ -370,7 +370,7 @@ class _GraphTensorArray(object): self._merge_element_shape( tensor_shape.TensorShape([clengths[0]]).concatenate( value.shape[1:])) - flow_out = gen_data_flow_ops._tensor_array_split_v3( + flow_out = gen_data_flow_ops.tensor_array_split_v3( handle=self._handle, value=value, lengths=lengths_64, @@ -386,13 +386,13 @@ class _GraphTensorArray(object): def size(self, name=None): """See TensorArray.""" - return gen_data_flow_ops._tensor_array_size_v3( + return gen_data_flow_ops.tensor_array_size_v3( handle=self._handle, flow_in=self.flow, name=name) @tf_should_use.should_use_result def close(self, name=None): """See TensorArray.""" - return gen_data_flow_ops._tensor_array_close_v3( + return gen_data_flow_ops.tensor_array_close_v3( handle=self._handle, name=name) # pylint: enable=protected-access diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index b80ad79074..7ff633a654 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -152,8 +152,7 @@ def image(name, tensor, max_outputs=3, collections=None, family=None): """ with _summary_op_util.summary_scope( name, family, values=[tensor]) as (tag, scope): - # pylint: disable=protected-access - val = _gen_logging_ops._image_summary( + val = _gen_logging_ops.image_summary( tag=tag, tensor=tensor, max_images=max_outputs, name=scope) _summary_op_util.collect(val, collections, [_ops.GraphKeys.SUMMARIES]) return val @@ -237,10 +236,9 @@ def audio(name, tensor, sample_rate, max_outputs=3, collections=None, """ with _summary_op_util.summary_scope( name, family=family, values=[tensor]) as (tag, scope): - # pylint: disable=protected-access sample_rate = _ops.convert_to_tensor( sample_rate, dtype=_dtypes.float32, name='sample_rate') - val = _gen_logging_ops._audio_summary_v2( + val = _gen_logging_ops.audio_summary_v2( tag=tag, tensor=tensor, max_outputs=max_outputs, sample_rate=sample_rate, name=scope) _summary_op_util.collect(val, collections, [_ops.GraphKeys.SUMMARIES]) @@ -286,8 +284,7 @@ def merge(inputs, collections=None, name=None): 'Use tf.contrib.summary instead.') name = _summary_op_util.clean_tag(name) with _ops.name_scope(name, 'Merge', inputs): - # pylint: disable=protected-access - val = _gen_logging_ops._merge_summary(inputs=inputs, name=name) + val = _gen_logging_ops.merge_summary(inputs=inputs, name=name) _summary_op_util.collect(val, collections, []) return val diff --git a/tensorflow/python/training/checkpoint_ops.py b/tensorflow/python/training/checkpoint_ops.py index 7f92d94d2b..a6e9662b73 100644 --- a/tensorflow/python/training/checkpoint_ops.py +++ b/tensorflow/python/training/checkpoint_ops.py @@ -149,7 +149,7 @@ def _load_and_remap_matrix(ckpt_path, num_rows_present = num_rows_to_load if remap_rows: row_remapping, num_rows_present = ( - gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access + gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=new_row_vocab_file, old_vocab_file=old_row_vocab_file, new_vocab_offset=new_row_vocab_offset, @@ -168,7 +168,7 @@ def _load_and_remap_matrix(ckpt_path, num_cols_present = new_col_vocab_size if remap_cols: col_remapping, num_cols_present = ( - gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access + gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=new_col_vocab_file, old_vocab_file=old_col_vocab_file, new_vocab_offset=0, # Offset is unused for cols (no partitioning). @@ -178,7 +178,7 @@ def _load_and_remap_matrix(ckpt_path, num_rows_to_load * new_col_vocab_size - num_rows_present * num_cols_present, 1 ]) - return_tensor = gen_checkpoint_ops._load_and_remap_matrix( # pylint: disable=protected-access + return_tensor = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, row_remapping=row_remapping, diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py index 1ce8c156a0..23b30632f6 100644 --- a/tensorflow/python/training/learning_rate_decay_test.py +++ b/tensorflow/python/training/learning_rate_decay_test.py @@ -43,8 +43,8 @@ class LRDecayTest(test_util.TensorFlowTestCase): def testStaircase(self): with self.test_session(): - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable(shape=[], dtype=dtypes.int32, + name="step", container="", shared_name="") assign_100 = state_ops.assign(step, 100) assign_1 = state_ops.assign(step, 1) assign_2 = state_ops.assign(step, 2) @@ -264,8 +264,8 @@ class ExponentialDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.natural_exp_decay(initial_lr, step, @@ -281,8 +281,8 @@ class ExponentialDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.natural_exp_decay(initial_lr, @@ -304,8 +304,8 @@ class InverseDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.inverse_time_decay(initial_lr, @@ -323,8 +323,8 @@ class InverseDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.inverse_time_decay(initial_lr, diff --git a/tensorflow/python/training/moving_averages_test.py b/tensorflow/python/training/moving_averages_test.py index 6efdeb2866..6717811bbb 100644 --- a/tensorflow/python/training/moving_averages_test.py +++ b/tensorflow/python/training/moving_averages_test.py @@ -376,7 +376,7 @@ class ExponentialMovingAverageTest(test.TestCase): with ops.device("/job:dev_v0"): v0 = variables.Variable(10.0, name="v0") with ops.device("/job:dev_v1"): - v1 = gen_state_ops._variable( + v1 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="v1", diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 9afd1e6643..e8ea5abfbd 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -311,8 +311,7 @@ class BaseSaverBuilder(object): Returns: A string tensor. """ - # pylint: disable=protected-access - return gen_io_ops._sharded_filename(filename_tensor, shard, num_shards) + return gen_io_ops.sharded_filename(filename_tensor, shard, num_shards) def _AddSaveOps(self, filename_tensor, saveables): """Add ops to save variables that are on the same shard. @@ -421,8 +420,7 @@ class BaseSaverBuilder(object): sharded_saves.append(self._AddSaveOps(sharded_filename, saveables)) # Return the sharded name for the save path. with ops.control_dependencies([x.op for x in sharded_saves]): - # pylint: disable=protected-access - return gen_io_ops._sharded_filespec(filename_tensor, num_shards_tensor) + return gen_io_ops.sharded_filespec(filename_tensor, num_shards_tensor) def _AddRestoreOps(self, filename_tensor, diff --git a/tensorflow/python/training/saver_test_utils.py b/tensorflow/python/training/saver_test_utils.py index 44b06b357e..0a8b7a09af 100644 --- a/tensorflow/python/training/saver_test_utils.py +++ b/tensorflow/python/training/saver_test_utils.py @@ -35,7 +35,7 @@ class CheckpointedOp(object): # pylint: disable=protected-access def __init__(self, name, table_ref=None): if table_ref is None: - self.table_ref = gen_lookup_ops._mutable_hash_table_v2( + self.table_ref = gen_lookup_ops.mutable_hash_table_v2( key_dtype=dtypes.string, value_dtype=dtypes.float32, name=name) else: self.table_ref = table_ref @@ -57,10 +57,10 @@ class CheckpointedOp(object): return CheckpointedOp.CustomSaveable(self, self.name) def insert(self, keys, values): - return gen_lookup_ops._lookup_table_insert_v2(self.table_ref, keys, values) + return gen_lookup_ops.lookup_table_insert_v2(self.table_ref, keys, values) def lookup(self, keys, default): - return gen_lookup_ops._lookup_table_find_v2(self.table_ref, keys, default) + return gen_lookup_ops.lookup_table_find_v2(self.table_ref, keys, default) def keys(self): return self._export()[0] @@ -69,8 +69,8 @@ class CheckpointedOp(object): return self._export()[1] def _export(self): - return gen_lookup_ops._lookup_table_export_v2(self.table_ref, dtypes.string, - dtypes.float32) + return gen_lookup_ops.lookup_table_export_v2(self.table_ref, dtypes.string, + dtypes.float32) class CustomSaveable(saver_module.BaseSaverBuilder.SaveableObject): """A custom saveable for CheckpointedOp.""" @@ -86,6 +86,6 @@ class CheckpointedOp(object): super(CheckpointedOp.CustomSaveable, self).__init__(table, specs, name) def restore(self, restore_tensors, shapes): - return gen_lookup_ops._lookup_table_import_v2( + return gen_lookup_ops.lookup_table_import_v2( self.op.table_ref, restore_tensors[0], restore_tensors[1]) # pylint: enable=protected-access diff --git a/tensorflow/python/user_ops/user_ops.py b/tensorflow/python/user_ops/user_ops.py index 17dbab706c..6f9b5d92bb 100644 --- a/tensorflow/python/user_ops/user_ops.py +++ b/tensorflow/python/user_ops/user_ops.py @@ -27,4 +27,4 @@ from tensorflow.python.ops.gen_user_ops import * # pylint: disable=wildcard-imp def my_fact(): """Example of overriding the generated code for an Op.""" - return _gen_user_ops._fact() # pylint: disable=protected-access + return _gen_user_ops.fact() -- GitLab From 8687aa6f7da68e378d5465914109498f23e300a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 14:55:50 -0800 Subject: [PATCH 173/311] Remove unnecessary stack operations from graphs. This change implements the first such optimization that removes stack pushes without corresponding pops. PiperOrigin-RevId: 187387794 --- tensorflow/core/grappler/op_types.cc | 13 ++++ tensorflow/core/grappler/op_types.h | 4 ++ .../grappler/optimizers/loop_optimizer.cc | 62 ++++++++++++++++++- .../optimizers/loop_optimizer_test.cc | 59 ++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 2 +- tensorflow/core/grappler/utils.cc | 17 +++++ tensorflow/core/grappler/utils.h | 8 +++ 7 files changed, 161 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 9b3755ddce..fb46b584b2 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -300,6 +300,19 @@ bool IsSquaredDifference(const NodeDef& node) { bool IsSqueeze(const NodeDef& node) { return node.op() == "Squeeze"; } +bool IsStackOp(const NodeDef& node) { + return node.op() == "Stack" || node.op() == "StackV2"; +} +bool IsStackCloseOp(const NodeDef& node) { + return node.op() == "StackClose" || node.op() == "StackCloseV2"; +} +bool IsStackPushOp(const NodeDef& node) { + return node.op() == "StackPush" || node.op() == "StackPushV2"; +} +bool IsStackPopOp(const NodeDef& node) { + return node.op() == "StackPop" || node.op() == "StackPopV2"; +} + bool IsStopGradient(const NodeDef& node) { const auto& op = node.op(); return op == "StopGradient" || op == "PreventGradient"; diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 1fa43a9b66..a7c33ef97b 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -118,6 +118,10 @@ bool IsSplitV(const NodeDef& node); bool IsSqrtGrad(const NodeDef& node); bool IsSquaredDifference(const NodeDef& node); bool IsSqueeze(const NodeDef& node); +bool IsStackOp(const NodeDef& node); +bool IsStackCloseOp(const NodeDef& node); +bool IsStackPushOp(const NodeDef& node); +bool IsStackPopOp(const NodeDef& node); bool IsStopGradient(const NodeDef& node); bool IsStridedSlice(const NodeDef& node); bool IsStridedSliceGrad(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 102526e22f..cc226c01db 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -22,20 +22,76 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/strcat.h" namespace tensorflow { namespace grappler { +namespace { -Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) { - *optimized_graph = item.graph; +Status RemoveStackOps(const GraphDef& graph, GraphDef* optimized_graph) { + SimpleGraphView graph_view; + TF_RETURN_IF_ERROR(graph_view.Initialize(graph)); + const std::unordered_set op_types_to_traverse( + {"Stack", "StackV2", "Enter", "Switch", "RefSwitch", "Identity"}); + std::set nodes_to_delete; + for (int node_idx = 0; node_idx < graph.node_size(); ++node_idx) { + const NodeDef& node = graph.node(node_idx); + if (IsStackOp(node)) { + std::set nodes_found; + graph_view.DepthFirstSearch(op_types_to_traverse, node_idx, &nodes_found); + bool found_pop = false; + bool found_unexpected = false; + for (int found_idx : nodes_found) { + const NodeDef& node = graph.node(found_idx); + if (IsStackPushOp(node) || IsStackOp(node) || IsStackCloseOp(node)) { + continue; + } else if (IsStackPopOp(node)) { + found_pop = true; + } else { + // Don't modify the graph if we found an unexpected op. There may be + // a pop hiding behind it. + found_unexpected = true; + } + } + if (!found_unexpected && !found_pop) { + VLOG(1) << "Found stack node with no pop: " << node.DebugString(); + // Remove all pushes. + for (int found_idx : nodes_found) { + const NodeDef& node = graph.node(found_idx); + if (IsStackPushOp(node)) { + nodes_to_delete.insert(found_idx); + } + } + } + } + } + *optimized_graph = graph; + if (!nodes_to_delete.empty()) { + int last = optimized_graph->node_size() - 1; + for (auto it = nodes_to_delete.rbegin(); it != nodes_to_delete.rend(); + ++it) { + const int node_to_delete = *it; + optimized_graph->mutable_node()->SwapElements(node_to_delete, last); + --last; + } + optimized_graph->mutable_node()->DeleteSubrange(last + 1, + nodes_to_delete.size()); + } return Status::OK(); } +} // namespace + +Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + Status status = RemoveStackOps(item.graph, optimized_graph); + return status; +} + void LoopOptimizer::Feedback(Cluster* /*cluster*/, const GrapplerItem& /*item*/, const GraphDef& /*optimized_graph*/, double /*result*/) { diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index c09434f609..bb2ee6b02b 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -57,6 +57,65 @@ TEST_F(LoopOptimizerTest, NoOp) { VerifyGraphsEqual(item.graph, output, __FUNCTION__); } +namespace { +NodeDef* AddNode(const string& name, const string& op, + const std::vector& inputs, GraphDef* graph) { + NodeDef* node = graph->add_node(); + node->set_name(name); + node->set_op(op); + for (const string& input : inputs) { + node->add_input(input); + } + return node; +} +} // namespace + +TEST_F(LoopOptimizerTest, RemovePush_NoOp) { + GrapplerItem item; + GraphDef& graph = item.graph; + // Stack with corresponding push/pop. + AddNode("stack1", "StackV2", {}, &graph); + AddNode("push1", "StackPushV2", {"stack1"}, &graph); + AddNode("pop1", "StackPopV2", {"stack1"}, &graph); + // Stack with corresponding push/pop behind Enter. + AddNode("stack2", "StackV2", {}, &graph); + AddNode("push_enter", "Enter", {"stack1"}, &graph); + AddNode("push2", "StackPushV2", {"push_enter"}, &graph); + AddNode("pop_enter", "Enter", {"stack1"}, &graph); + AddNode("pop2", "StackPopV2", {"pop_enter"}, &graph); + // Stack with unexpected op type in fanout of Stack. + AddNode("stack3", "StackV2", {}, &graph); + AddNode("push3", "StackPushV2", {"stack3"}, &graph); + AddNode("stop", "StopGradient", {"stack3"}, &graph); + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + VerifyGraphsEqual(item.graph, output, __FUNCTION__); +} + +TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { + GrapplerItem item; + GraphDef& graph = item.graph; + AddNode("stack1", "StackV2", {}, &graph); + AddNode("push1", "StackPushV2", {"stack1"}, &graph); + AddNode("stack2", "StackV2", {}, &graph); + AddNode("push_enter", "Enter", {"stack2"}, &graph); + AddNode("push2", "StackPushV2", {"push_enter"}, &graph); + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + EXPECT_EQ(3, output.node_size()); + int found = 0; + for (int i = 0; i < output.node_size(); ++i) { + if (output.node(i).name() == "stack1") ++found; + if (output.node(i).name() == "push_enter") ++found; + if (output.node(i).name() == "stack2") ++found; + } + EXPECT_EQ(3, found); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 93658a6475..b674ee1553 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -110,7 +110,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new DependencyOptimizer(cfg_.dependency_optimization()))); } - if (cfg_.loop_optimization() != RewriterConfig::OFF) { + if (cfg_.loop_optimization() == RewriterConfig::ON) { optimizers.push_back(std::unique_ptr( new LoopOptimizer(cfg_.loop_optimization()))); } diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 81bb5e6c3b..a611a93086 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -348,6 +348,7 @@ inline void STLSortAndRemoveDuplicates(T* v) { Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs, bool dedup_outputs) { + graph_ = &graph; const int num_nodes = graph.node_size(); inputs_.clear(); inputs_.resize(num_nodes); @@ -394,6 +395,22 @@ Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs, return Status::OK(); } +void SimpleGraphView::DepthFirstSearch( + const std::unordered_set& op_types_to_traverse, int node_idx, + std::set* nodes_found) const { + const NodeDef& node = graph_->node(node_idx); + if (op_types_to_traverse.find(node.op()) == op_types_to_traverse.end()) { + nodes_found->insert(node_idx); + return; + } + if (nodes_found->find(node_idx) != nodes_found->end()) { + return; + } + for (auto output_idx : this->outputs(node_idx)) { + DepthFirstSearch(op_types_to_traverse, output_idx, nodes_found); + } +} + string SimpleGraphView::PrintToString() const { string str; for (int i = 0; i < num_nodes(); ++i) { diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index 255319693a..1b91a57154 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -194,9 +194,17 @@ class SimpleGraphView { return outputs_[node_idx]; } + // Traverse the graph starting at `node_idx`, collecting indices of nodes + // visited in nodes_found. If a node has an op in `op_types_to_traverse`, the + // walk continues to its children. It is assumed that *graph_ was not modified + // after the call to Initialize(). + void DepthFirstSearch(const std::unordered_set& op_types_to_traverse, + int node_idx, std::set* nodes_found) const; + string PrintToString() const; private: + const GraphDef* graph_; // Not owned. std::vector index_to_name_; std::unordered_map name_to_index_; std::vector> inputs_; -- GitLab From d3c8659b27c644268156d15ec4b556e60db21491 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 28 Feb 2018 15:18:29 -0800 Subject: [PATCH 174/311] keras: Avoid unneccesary call to .call() when building models with subclassing. This fixes a regression in the defun microbenchmarks (ResNet50Benchmarks.eager_train_with_defun_gpu_batch_32_channels_first etc.) in tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py seen after https://github.com/tensorflow/tensorflow/commit/9a84277be2cb8233c5c14270db6fcdff31ab4d93 (which embeds a model in model) Without this change, converting a model call to a graph function using something like: model.call = tfe.defun(model.call) could result in redundant nodes being added to the graph function as the model._set_inputs() call would invoke model.call() again. PiperOrigin-RevId: 187391494 --- .../keras/_impl/keras/engine/base_layer.py | 7 +++-- .../keras/_impl/keras/engine/training.py | 28 +++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py index 142325041b..7f215f5645 100644 --- a/tensorflow/python/keras/_impl/keras/engine/base_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -240,9 +240,10 @@ class Layer(tf_base_layers.Layer): if context.in_eager_mode(): return output - # Un-built subclassed network: build it - if hasattr(self, '_set_inputs') and not self.inputs: - self._set_inputs(inputs, training=kwargs.get('training')) + if hasattr(self, '_symbolic_set_inputs') and not self.inputs: + # Subclassed network: explicitly set metadata normally set by a call to + # self._set_inputs(). + self._symbolic_set_inputs(inputs, output) # Update learning phase info. output_tensors = generic_utils.to_list(output) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 63bea08ac5..c121d819ff 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -1835,14 +1835,17 @@ class Model(Network): 'output_%d' % (i + 1) for i in range(len(dummy_output_values))] self.built = True - def _symbolic_set_inputs(self, inputs, training=None): - """Set model's inputs based on the input data received from the user. + def _symbolic_set_inputs(self, inputs, outputs=None, training=None): + """Set model's inputs and output specs based. This is to be used for Model subclasses, which do not know at instantiation time what their inputs look like. Args: inputs: Argument `x` (input data) passed by the user upon first model use. + outputs: None, a data tensor, or a list of data tensors. If None, the + outputs will be determined by invoking self.call(), otherwise the + provided value will be used. training: Boolean or None. Only relevant in symbolic mode. Specifies whether to build the model's graph in inference mode (False), training mode (True), or using the Keras learning phase (None). @@ -1892,17 +1895,18 @@ class Model(Network): self._feed_input_names.append(name) self._feed_input_shapes.append(K.int_shape(v)) - # Obtain symbolic outputs by calling the model. - if len(self.inputs) == 1: - if self._expects_training_arg: - outputs = self.call(self.inputs[0], training=training) - else: - outputs = self.call(self.inputs[0]) - else: - if self._expects_training_arg: - outputs = self.call(self.inputs, training=training) + if outputs is None: + # Obtain symbolic outputs by calling the model. + if len(self.inputs) == 1: + if self._expects_training_arg: + outputs = self.call(self.inputs[0], training=training) + else: + outputs = self.call(self.inputs[0]) else: - outputs = self.call(self.inputs) + if self._expects_training_arg: + outputs = self.call(self.inputs, training=training) + else: + outputs = self.call(self.inputs) if isinstance(outputs, (list, tuple)): outputs = list(outputs) else: -- GitLab From 656055e0c9acd944b7a34bfe01c06ad122f87da8 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Wed, 28 Feb 2018 15:36:39 -0800 Subject: [PATCH 175/311] Exclude more tests for cuda_on_cpu project. PiperOrigin-RevId: 187394209 --- tensorflow/core/grappler/optimizers/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 7b801db2c8..b8995ef365 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -382,6 +382,7 @@ cc_library( tf_cc_test_gpu( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], + tags = ["no_cuda_on_cpu_tap"], deps = [ ":memory_optimizer", "//tensorflow/cc:cc_ops", -- GitLab From b98a1f31bca1e773ee215f2c32aa0509843c1247 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 15:44:55 -0800 Subject: [PATCH 176/311] Propagate NaNs for floating point min/max operations. PiperOrigin-RevId: 187395444 --- .../compiler/xla/service/hlo_evaluator.cc | 39 ++++++++--- .../compiler/xla/service/llvm_ir/llvm_util.cc | 12 ++-- .../xla/tests/array_elementwise_ops_test.cc | 70 +++---------------- .../xla/tests/scalar_computations_test.cc | 12 ++++ 4 files changed, 59 insertions(+), 74 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index edb1ad2360..42de7ada61 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -613,14 +613,25 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + template ::value>::type* = + nullptr> + Status HandleMaximum(HloInstruction* maximum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[maximum], + ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { + return std::max(lhs, rhs); + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> Status HandleMaximum(HloInstruction* maximum) { TF_ASSIGN_OR_RETURN( parent_->evaluated_[maximum], ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { - return std::fmax(lhs, rhs); + return ((lhs >= rhs) || std::isnan(lhs)) ? lhs : rhs; })); return Status::OK(); } @@ -636,18 +647,30 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return HandleMaximum(maximum); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + template ::value>::type* = + nullptr> Status HandleMinimum(HloInstruction* minimum) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum], ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { - return std::fmin(lhs_el, rhs_el); + return std::min(lhs_el, rhs_el); })); return Status::OK(); } + template ::value>::type* = nullptr> + Status HandleMinimum(HloInstruction* minimum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[minimum], + ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, + ElementwiseT rhs_el) { + return ((lhs_el <= rhs_el) || std::isnan(lhs_el)) ? lhs_el : rhs_el; + })); + return Status::OK(); + } + template < typename NativeT, typename std::enable_if::value>::type* = nullptr> diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 5c1866311d..2a282f3be7 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -106,8 +106,10 @@ llvm::Value* EmitFloatMax(llvm::Value* lhs_value, llvm::Value* rhs_value, auto cmp = ir_builder->CreateFCmpUGE(lhs_value, rhs_value); return ir_builder->CreateSelect(cmp, lhs_value, rhs_value); } else { - return EmitCallToIntrinsic(llvm::Intrinsic::maxnum, {lhs_value, rhs_value}, - {lhs_value->getType()}, ir_builder); + auto cmp_ge = ir_builder->CreateFCmpOGE(lhs_value, rhs_value); + auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value); + auto sel_lhs = ir_builder->CreateOr(cmp_ge, lhs_is_nan); + return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value); } } @@ -117,8 +119,10 @@ llvm::Value* EmitFloatMin(llvm::Value* lhs_value, llvm::Value* rhs_value, auto cmp = ir_builder->CreateFCmpULE(lhs_value, rhs_value); return ir_builder->CreateSelect(cmp, lhs_value, rhs_value); } else { - return EmitCallToIntrinsic(llvm::Intrinsic::minnum, {lhs_value, rhs_value}, - {lhs_value->getType()}, ir_builder); + auto cmp_le = ir_builder->CreateFCmpOLE(lhs_value, rhs_value); + auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value); + auto sel_lhs = ir_builder->CreateOr(cmp_le, lhs_is_nan); + return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value); } } diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 8b35259013..6e21dda25d 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -1648,33 +1648,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4DZeroElements) { ComputeAndCompareR4(&builder, expected, {}, error_spec_); } -// GPU backend emits nvvm intrinsic for fmin and fmax, whose semantics is NOT -// such -// * fmin(NaN, x) = x -// * fmax(NaN, x) = x -// so we only test NAN on CPU. -// -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. XLA_TEST_F(ArrayElementwiseOpTest, MinF32s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f}); - auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); -#endif auto minimum = builder.Min(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {1.0f, -5.0f, 1.0f}, -#else - {1.0f, -5.0f, 1.0f, 10.0f, 6.0f}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {1.0f, -5.0f, 1.0f, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { @@ -1685,50 +1667,26 @@ XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { ComputeAndCompareR1(&builder, {}, {}, error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MinF64s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0, 1.0, 2.25}); - auto rhs = builder.ConstantR1({2.0, -5.0, 1.0}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); -#endif auto minimum = builder.Min(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {1.0, -5.0, 1.0}, -#else - {1.0, -5.0, 1.0, 10.0, 6.0}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {1.0, -5.0, 1.0, NAN, NAN}, {}, + error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MaxF32s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f}); - auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); -#endif auto maximum = builder.Max(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {2.0f, 1.0f, 2.25f}, -#else - {2.0f, 1.0f, 2.25f, 10.0f, 6.0f}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {2.0f, 1.0f, 2.25f, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { @@ -1739,27 +1697,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { ComputeAndCompareR1(&builder, {}, {}, error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MaxF64s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0, 1.0, 2.25}); - auto rhs = builder.ConstantR1({2.0, -5.0, 1.0}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); -#endif auto maximum = builder.Max(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {2.0, 1.0, 2.25}, -#else - {2.0, 1.0, 2.25, 10.0, 6.0}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {2.0, 1.0, 2.25, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxS32s) { diff --git a/tensorflow/compiler/xla/tests/scalar_computations_test.cc b/tensorflow/compiler/xla/tests/scalar_computations_test.cc index d7bda77e87..0c88bef69d 100644 --- a/tensorflow/compiler/xla/tests/scalar_computations_test.cc +++ b/tensorflow/compiler/xla/tests/scalar_computations_test.cc @@ -860,6 +860,12 @@ XLA_TEST_F(ScalarComputationsTest, MinF32Below) { TestMinMax(-100.1f, 3.1f, -100.1f, &ComputationBuilder::Min); } +XLA_TEST_F(ScalarComputationsTest, MinPropagatesNan) { + SetFastMathDisabled(true); + TestMinMax(NAN, 3.1f, NAN, &ComputationBuilder::Min); + TestMinMax(-3.1f, NAN, NAN, &ComputationBuilder::Min); +} + XLA_TEST_F(ScalarComputationsTest, MaxF32Above) { TestMinMax(10.1f, 3.1f, 10.1f, &ComputationBuilder::Max); } @@ -868,6 +874,12 @@ XLA_TEST_F(ScalarComputationsTest, MaxF32Below) { TestMinMax(-100.1f, 3.1f, 3.1f, &ComputationBuilder::Max); } +XLA_TEST_F(ScalarComputationsTest, MaxPropagatesNan) { + SetFastMathDisabled(true); + TestMinMax(NAN, 3.1f, NAN, &ComputationBuilder::Max); + TestMinMax(-3.1f, NAN, NAN, &ComputationBuilder::Max); +} + XLA_TEST_F(ScalarComputationsTest, ComplicatedArithmeticExpressionF32) { // Compute the expression (1 * (3 - 1) * (7 + 0) - 4) / 20. ComputationBuilder b(client_, TestName()); -- GitLab From 9a52edb4760f13dda1b27f9126f8117d6c4f9bc9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 15:47:01 -0800 Subject: [PATCH 177/311] Update a few tests to work with Grappler constant folding. PiperOrigin-RevId: 187395886 --- tensorflow/python/kernel_tests/pooling_ops_test.py | 8 ++++++-- tensorflow/python/kernel_tests/reduction_ops_test.py | 4 +++- tensorflow/python/kernel_tests/softmax_op_test.py | 3 +-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index a0ac355b60..2f3bea5825 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging @@ -731,7 +732,8 @@ class PoolingTest(test.TestCase): [1, 1, 1, 3], "evenly divide") if test.is_gpu_available(): with self.test_session(use_gpu=True): - t = constant_op.constant(1.0, shape=[1, 2, 2, 4]) + t = variables.Variable(np.ones([1, 2, 2, 4])) + variables.global_variables_initializer().run() with self.assertRaisesOpError("for CPU devices"): nn_ops.max_pool( t, ksize=[1, 1, 1, 2], strides=[1, 1, 1, 2], @@ -1210,7 +1212,9 @@ class PoolingTest(test.TestCase): padding, use_gpu, v2): pool_func = gen_nn_ops.max_pool_v2 if v2 else nn_ops.max_pool with self.test_session(use_gpu=use_gpu): - input_tensor = constant_op.constant(input_data, shape=input_sizes) + input_tensor = variables.Variable( + np.array(input_data, dtype=np.float32).reshape(input_sizes)) + variables.global_variables_initializer().run() output_tensor = pool_func(input_tensor, [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) output_backprop_tensor = constant_op.constant( diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index d306d1b8d6..589ea54973 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test # The maximum input rank to test. @@ -212,7 +213,8 @@ class SumReductionTest(BaseReductionTest): arr = np.ones([68000], dtype=np.float16) with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: - tf_arr = array_ops.constant(arr) + tf_arr = variables.Variable(arr) + variables.global_variables_initializer().run() tf_mean = math_ops.reduce_mean(tf_arr, 0, False) tf_out_mean = sess.run(tf_mean) self.assertAllClose(tf_out_mean, 1.) diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index 4d89831aae..2b8e99e18e 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util @@ -166,7 +165,7 @@ class SoftmaxTest(test.TestCase): def testEmptyInput(self): with self.test_session(): - x = constant_op.constant([[]], shape=[0, 3]) + x = array_ops.placeholder(dtypes.float32, shape=[0, 3]) self.assertEqual(0, array_ops.size(x).eval()) # reshape would raise if logits is empty with self.assertRaises(errors_impl.InvalidArgumentError): -- GitLab From 8be4ab7b2d2ad00ffa84da82e9cbba88c677877d Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 28 Feb 2018 15:51:18 -0800 Subject: [PATCH 178/311] Add all_files target to gcs_smoke_test BUILD file. PiperOrigin-RevId: 187396477 --- tensorflow/BUILD | 1 + .../integration_tests/gcs_smoke_test/BUILD.bazel | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index a4e7602bea..4b2facd6b3 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -674,6 +674,7 @@ filegroup( "//tensorflow/tools/docs:all_files", "//tensorflow/tools/git:all_files", "//tensorflow/tools/graph_transforms:all_files", + "//tensorflow/tools/integration_tests/gcs_smoke_test:all_files", "//tensorflow/tools/mlpbtxt:all_files", "//tensorflow/tools/proto_text:all_files", "//tensorflow/tools/quantization:all_files", diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel b/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel index 439d86c5d2..0acc139df9 100755 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel +++ b/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel @@ -54,3 +54,14 @@ integration_test( test_docker_image = toolchain_container_images()["tensorflow"], test_type = "MultiMachine", ) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), +) -- GitLab From a5b336194f4fd1a26bcd5dfd159d6edf4dfdd081 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 28 Feb 2018 15:59:33 -0800 Subject: [PATCH 179/311] Remove record_gradient param from benchmark function PiperOrigin-RevId: 187397610 --- tensorflow/python/eager/benchmarks_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 228ff62b20..527a919ab0 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -35,7 +35,6 @@ from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop # pylint: disable=unused-import from tensorflow.python.eager import context from tensorflow.python.eager import core -from tensorflow.python.eager import execute from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import dtypes @@ -60,7 +59,7 @@ def c_tfe_py_fastpath_execute(a, ), "The prototype doesn't contain C code for graph construction" try: return pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, name, + ctx._handle, ctx.device_name, "MatMul", name, ctx._post_execution_callbacks, a, b, "transpose_a", transpose_a, "transpose_b", transpose_b) except core._NotOkStatusException as e: -- GitLab From e670c81d85f3353ea3b701569f8f5126714a02bf Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 28 Feb 2018 16:22:42 -0800 Subject: [PATCH 180/311] GCS: HTTP error code 308 retries during upload. Previously, it would only permit 308 when getting the status of an upload. This matches the behavior of the official library: https://github.com/google/apitools/blob/master/apitools/base/py/transfer.py#L925 And the general description here: https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload PiperOrigin-RevId: 187400843 --- .../core/platform/cloud/curl_http_request.cc | 8 +++-- .../platform/cloud/gcs_file_system_test.cc | 33 +++++++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 88a5d1e96d..4b5f6974c1 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -493,14 +493,18 @@ Status CurlHttpRequest::Send() { case 303: // See Other case 304: // Not Modified case 307: // Temporary Redirect - case 308: // Resume Incomplete case 412: // Precondition Failed case 413: // Payload Too Large result = errors::FailedPrecondition(error_message); break; // UNAVAILABLE indicates a problem that can go away if the request - // is just retried without any modification. + // is just retried without any modification. 308 return codes are intended + // for write requests that can be retried. See the documentation and the + // official library: + // https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload + // https://github.com/google/apitools/blob/master/apitools/base/py/transfer.py + case 308: // Resume Incomplete case 409: // Conflict case 429: // Too Many Requests case 500: // Internal Server Error diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index d452074ce3..cd9fd3adea 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -393,7 +393,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) { "Timeouts: 5 1 10\n" "Header Content-Range: bytes */17\n" "Put: yes\n", - "", errors::FailedPrecondition("308"), nullptr, + "", errors::Unavailable("308"), nullptr, {{"Range", "0-10"}}, 308), new FakeHttpRequest("Uri: https://custom/upload/location\n" "Auth Token: fake_token\n" @@ -406,13 +406,26 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) { "Timeouts: 5 1 10\n" "Header Content-Range: bytes */17\n" "Put: yes\n", - "", errors::FailedPrecondition("308"), nullptr, + "", errors::Unavailable("308"), nullptr, {{"Range", "bytes=0-12"}}, 308), new FakeHttpRequest("Uri: https://custom/upload/location\n" "Auth Token: fake_token\n" "Header Content-Range: bytes 13-16/17\n" "Timeouts: 5 1 30\n" "Put body: ent2\n", + "", errors::Unavailable("308"), 308), + new FakeHttpRequest("Uri: https://custom/upload/location\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n" + "Header Content-Range: bytes */17\n" + "Put: yes\n", + "", errors::Unavailable("308"), nullptr, + {{"Range", "bytes=0-14"}}, 308), + new FakeHttpRequest("Uri: https://custom/upload/location\n" + "Auth Token: fake_token\n" + "Header Content-Range: bytes 15-16/17\n" + "Timeouts: 5 1 30\n" + "Put body: t2\n", "")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( @@ -521,14 +534,14 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) { "Put body: content1,content2\n", "", errors::Unavailable("503"), 503)}); for (int i = 0; i < 10; i++) { - requests.emplace_back(new FakeHttpRequest( - "Uri: https://custom/upload/location\n" - "Auth Token: fake_token\n" - "Timeouts: 5 1 10\n" - "Header Content-Range: bytes */17\n" - "Put: yes\n", - "", errors::FailedPrecondition("important HTTP error 308"), nullptr, - {{"Range", "0-10"}}, 308)); + requests.emplace_back( + new FakeHttpRequest("Uri: https://custom/upload/location\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n" + "Header Content-Range: bytes */17\n" + "Put: yes\n", + "", errors::Unavailable("important HTTP error 308"), + nullptr, {{"Range", "0-10"}}, 308)); requests.emplace_back(new FakeHttpRequest( "Uri: https://custom/upload/location\n" "Auth Token: fake_token\n" -- GitLab From 86061c8e8034c5bee955659bdda8366f640f543d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 16:41:17 -0800 Subject: [PATCH 181/311] Adding the documentation for building the iOS demo for TensorFlow Lite. PiperOrigin-RevId: 187403346 --- tensorflow/docs_src/mobile/leftnav_files | 1 + tensorflow/docs_src/mobile/tflite/demo_ios.md | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 tensorflow/docs_src/mobile/tflite/demo_ios.md diff --git a/tensorflow/docs_src/mobile/leftnav_files b/tensorflow/docs_src/mobile/leftnav_files index ac50f528ba..4cf134cc3c 100644 --- a/tensorflow/docs_src/mobile/leftnav_files +++ b/tensorflow/docs_src/mobile/leftnav_files @@ -2,6 +2,7 @@ index.md ### TensorFlow Lite tflite/index.md tflite/demo_android.md +tflite/demo_ios.md >>> ### TensorFlow Mobile mobile_intro.md diff --git a/tensorflow/docs_src/mobile/tflite/demo_ios.md b/tensorflow/docs_src/mobile/tflite/demo_ios.md new file mode 100644 index 0000000000..3ee9b1cbca --- /dev/null +++ b/tensorflow/docs_src/mobile/tflite/demo_ios.md @@ -0,0 +1,68 @@ +# TensorFlow Lite Demo for iOS + +The TensorFlow Lite demo is a camera app that continuously classifies whatever +it sees from your device's back camera, using a quantized MobileNet model. These +instructions walk you through building and running the demo on an iOS device. + +## Prerequisites + +* You must have [Xcode](https://developer.apple.com/xcode/) installed and have a + valid Apple Developer ID, and have an iOS device set up and linked to your + developer account with all of the appropriate certificates. For these + instructions, we assume that you have already been able to build and deploy an + app to an iOS device with your current developer environment. + +* The demo app requires a camera and must be executed on a real iOS device. You + can build it and run with the iPhone Simulator but it won't have any camera + information to classify. + +* You don't need to build the entire TensorFlow library to run the demo, but you + will need to clone the TensorFlow repository if you haven't already: + + git clone https://github.com/tensorflow/tensorflow + +* You'll also need the Xcode command-line tools: + + xcode-select --install + + If this is a new install, you will need to run the Xcode application once to + agree to the license before continuing. + +## Building the iOS Demo App + +1. Install CocoaPods if you don't have it: + + sudo gem install cocoapods + +2. Download the model files used by the demo app (this is done from inside the + cloned directory): + + sh tensorflow/contrib/lite/examples/ios/download_models.sh + +3. Install the pod to generate the workspace file: + + cd tensorflow/contrib/lite/examples/ios/camera + pod install + + If you have installed this pod before and that command doesn't work, try + + pod update + + At the end of this step you should have a file called + `tflite_camera_example.xcworkspace`. + +4. Open the project in Xcode by typing this on the command line: + + open tflite_camera_example.xcworkspace + + This launches Xcode if it isn't open already and opens the + `tflite_camera_example` project. + +5. Build and run the app in Xcode. + + Note that as mentioned earlier, you must already have a device set up and + linked to your Apple Developer account in order to deploy the app on a + device. + +You'll have to grant permissions for the app to use the device's camera. Point +the camera at various objects and enjoy seeing how the model classifies things! -- GitLab From 6a2bb85654655d7dc6e5017de6586e76634ebcd1 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Wed, 28 Feb 2018 17:18:52 -0800 Subject: [PATCH 182/311] Docs: Update Fixed Point Quantization in performance. PiperOrigin-RevId: 187408106 --- tensorflow/docs_src/performance/leftnav_files | 4 +- .../docs_src/performance/quantization.md | 461 +++++++++--------- 2 files changed, 245 insertions(+), 220 deletions(-) diff --git a/tensorflow/docs_src/performance/leftnav_files b/tensorflow/docs_src/performance/leftnav_files index 316f023f43..d11a7e5d07 100644 --- a/tensorflow/docs_src/performance/leftnav_files +++ b/tensorflow/docs_src/performance/leftnav_files @@ -2,6 +2,7 @@ performance_guide.md datasets_performance.md performance_models.md benchmarks.md +quantization.md ### XLA xla/index.md @@ -11,6 +12,3 @@ xla/jit.md xla/operation_semantics.md xla/shapes.md xla/tfcompile.md - -### Quantization -quantization.md diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 544274cab6..63448c2ebe 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -1,226 +1,253 @@ -# How to Quantize Neural Networks with TensorFlow - -When modern neural networks were being developed, the biggest challenge was -getting them to work at all! That meant that accuracy and speed during training -were the top priorities. Using floating point arithmetic was the easiest way to -preserve accuracy, and GPUs were well-equipped to accelerate those calculations, -so it's natural that not much attention was paid to other numerical formats. - -These days, we actually have a lot of models being deployed in commercial -applications. The computation demands of training grow with the number of -researchers, but the cycles needed for inference expand in proportion to users. -That means pure inference efficiency has become a burning issue for a lot of -teams. - -That is where quantization comes in. It's an umbrella term that covers a lot of -different techniques to store numbers and perform calculations on them in more -compact formats than 32-bit floating point. I am going to focus on eight-bit -fixed point, for reasons I'll go into more detail on later. - -[TOC] - -## Why does Quantization Work? - -Training neural networks is done by applying many tiny nudges to the weights, -and these small increments typically need floating point precision to work -(though there are research efforts to use quantized representations here too). - -Taking a pre-trained model and running inference is very different. One of the -magical qualities of deep networks is that they tend to cope very well with high -levels of noise in their inputs. If you think about recognizing an object in a -photo you've just taken, the network has to ignore all the CCD noise, lighting -changes, and other non-essential differences between it and the training -examples it's seen before, and focus on the important similarities instead. This -ability means that they seem to treat low-precision calculations as just another -source of noise, and still produce accurate results even with numerical formats -that hold less information. - -## Why Quantize? - -Neural network models can take up a lot of space on disk, with the original -AlexNet being over 200 MB in float format for example. Almost all of that size -is taken up with the weights for the neural connections, since there are often -many millions of these in a single model. Because they're all slightly different -floating point numbers, simple compression formats like zip don't compress them -well. They are arranged in large layers though, and within each layer the -weights tend to be normally distributed within a certain range, for example -3.0 -to 6.0. - -The simplest motivation for quantization is to shrink file sizes by storing the -min and max for each layer, and then compressing each float value to an -eight-bit integer representing the closest real number in a linear set of 256 -within the range. For example with the -3.0 to 6.0 range, a 0 byte would -represent -3.0, a 255 would stand for 6.0, and 128 would represent about 1.5. -I'll go into the exact calculations later, since there's some subtleties, but -this means you can get the benefit of a file on disk that's shrunk by 75%, and -then convert back to float after loading so that your existing floating-point -code can work without any changes. - -Another reason to quantize is to reduce the computational resources you need to -do the inference calculations, by running them entirely with eight-bit inputs -and outputs. This is a lot more difficult since it requires changes everywhere -you do calculations, but offers a lot of potential rewards. Fetching eight-bit -values only requires 25% of the memory bandwidth of floats, so you'll make much -better use of caches and avoid bottlenecking on RAM access. You can also -typically use SIMD operations that do many more operations per clock cycle. In -some case you'll have a DSP chip available that can accelerate eight-bit -calculations too, which can offer a lot of advantages. - -Moving calculations over to eight bit will help you run your models faster, and -use less power (which is especially important on mobile devices). It also opens -the door to a lot of embedded systems that can't run floating point code -efficiently, so it can enable a lot of applications in the IoT world. - -## Why Not Train in Lower Precision Directly? - -There have been some experiments training at lower bit depths, but the results -seem to indicate that you need higher than eight bit to handle the back -propagation and gradients. That makes implementing the training more -complicated, and so starting with inference made sense. We also already have a -lot of float models already that we use and know well, so being able to convert -them directly is very convenient. - -## How Can You Quantize Your Models? - -TensorFlow has production-grade support for eight-bit calculations built in. It -also has a process for converting many models trained in floating-point over to -equivalent graphs using quantized calculations for inference. For example, -here's how you can translate the latest GoogLeNet model into a version that uses -eight-bit computations: - -```sh -curl -L "https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz" | - tar -C tensorflow/examples/label_image/data -xz -bazel build tensorflow/tools/graph_transforms:transform_graph -bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=tensorflow/examples/label_image/data/inception_v3_2016_08_28_frozen.pb \ - --out_graph=/tmp/quantized_graph.pb \ - --inputs=input \ - --outputs=InceptionV3/Predictions/Reshape_1 \ - --transforms='add_default_attributes strip_unused_nodes(type=float, shape="1,299,299,3") - remove_nodes(op=Identity, op=CheckNumerics) fold_constants(ignore_errors=true) - fold_batch_norms fold_old_batch_norms quantize_weights quantize_nodes - strip_unused_nodes sort_by_execution_order' +# Fixed Point Quantization + +Quantization techniques store and calculate numbers in more compact formats. +[TensorFlow Lite](/mobile/tflite/) adds quantization that uses an 8-bit fixed +point representation. + +Since a challenge for modern neural networks is optimizing for high accuracy, the +priority has been improving accuracy and speed during training. Using floating +point arithmetic is an easy way to preserve accuracy and GPUs are designed to +accelerate these calculations. + +However, as more machine learning models are deployed to mobile devices, +inference efficiency has become a critical issue. Where the computational demand +for *training* grows with the amount of models trained on different +architectures, the computational demand for *inference* grows in proportion to +the amount of users. + +## Quantization benefits + + +Using 8-bit calculations help your models run faster and use less power. This is +especially important for mobile devices and embedded applications that can't run +floating point code efficiently, for example, Internet of Things (IoT) and +robotics devices. There are additional opportunities to extend this support to +more backends and research lower precision networks. + +### Smaller file sizes {: .hide-from-toc} + +Neural network models require a lot of space on disk. For example, the original +AlexNet requires over 200 MB for the float format—almost all of that for the +model's millions of weights. Because the weights are slightly different +floating point numbers, simple compression formats perform poorly (like zip). + +Weights fall in large layers of numerical values. For each layer, weights tend to +be normally distributed within a range. Quantization can shrink file sizes by +storing the minimum and maximum weight for each layer, then compress each +weight's float value to an 8-bit integer representing the closest real number in +a linear set of 256 within the range. + +### Faster inference {: .hide-from-toc} + +Since calculations are run entirely on 8-bit inputs and outputs, quantization +reduces the computational resources needed for inference calculations. This is +more involved, requiring changes to all floating point calculations, but results +in a large speed-up for inference time. + +### Memory efficiency {: .hide-from-toc} + +Since fetching 8-bit values only requires 25% of the memory bandwidth of floats, +more efficient caches avoid bottlenecks for RAM access. In many cases, the power +consumption for running a neural network is dominated by memory access. The +savings from using fixed-point 8-bit weights and activations are significant. + +Typically, SIMD operations are available that run more operations per clock +cycle. In some cases, a DSP chip is available that accelerates 8-bit calculations +resulting in a massive speedup. + +## Fixed point quantization techniques + +The goal is to use the same precision for weights and activations during both +training and inference. But an important difference is that training consists of +a forward pass and a backward pass, while inference only uses a forward pass. +When we train the model with quantization in the loop, we ensure that the forward +pass matches precision for both training and inference. + +To minimize the loss in accuracy for fully fixed point models (weights and +activations), train the model with quantization in the loop. This simulates +quantization in the forward pass of a model so weights tend towards values that +perform better during quantized inference. The backward pass uses quantized +weights and activations and models quantization as a straight through estimator. +(See Bengio et al., [2013](https://arxiv.org/abs/1308.3432)) + +Additionally, the minimum and maximum values for activations are determined +during training. This allows a model trained with quantization in the loop to be +converted to a fixed point inference model with little effort, eliminating the +need for a separate calibration step. + +## Quantization training with TensorFlow + +TensorFlow can train models with quantization in the loop. Because training +requires small gradient adjustments, floating point values are still used. To +keep models as floating point while adding the quantization error in the training +loop, @{$array_ops#Fake_quantization} nodes simulate the effect of quantization +in the forward and backward passes. + +Since it's difficult to add these fake quantization operations to all the +required locations in the model, there's a function available that rewrites the +training graph. To create a fake quantized training graph: + +``` +# Build forward pass of model. +loss = tf.losses.get_total_loss() + +# Call the training rewrite which rewrites the graph in-place with +# FakeQuantization nodes and folds batchnorm for training. It is +# often needed to fine tune a floating point model for quantization +# with this training tool. When training from scratch, quant_delay +# can be used to activate quantization after training to converge +# with the float graph, effectively fine-tuning the model. +tf.contrib.quantize.create_training_graph(quant_delay=2000000) + +# Call backward pass optimizer as usual. +optimizer = tf.train.GradientDescentOptimizer(learning_rate) +optimizer.minimize(loss) ``` -This will produce a new model that runs the same operations as the original, but -with eight bit calculations internally, and all weights quantized as well. If -you look at the file size, you'll see it's about a quarter of the original (23MB -versus 91MB). You can still run this model using exactly the same inputs and -outputs though, and you should get equivalent results. Here's an example: +The rewritten *eval graph* is non-trivially different from the *training graph* +since the quantization ops affect the batch normalization step. Because of this, +we've added a separate rewrite for the *eval graph*: -```sh -bazel build tensorflow/examples/label_image:label_image -bazel-bin/tensorflow/examples/label_image/label_image \ ---graph=/tmp/quantized_graph.pb \ +``` +# Build eval model +logits = tf.nn.softmax_cross_entropy_with_logits(...) + +# Call the eval rewrite which rewrites the graph in-place with +# FakeQuantization nodes and fold batchnorm for eval. +tf.contrib.quantize.create_eval_graph() + +# Save the checkpoint and eval graph proto to disk for freezing +# and providing to TFLite. +with open(eval_graph_file, ‘w’) as f: + f.write(str(g.as_graph_def())) +saver = tf.train.Saver() +saver.save(sess, checkpoint_name) +``` + +Methods to rewrite the training and eval graphs are an active area of research +and experimentation. Although rewrites and quantized training might not work or +improve performance for all models, we are working to generalize these +techniques. + +## Generating fully quantized models + +The previously demonstrated after-rewrite eval graph only *simulates* +quantization. To generate real fixed point computations from a trained +quantization model, convert it to a fixed point kernel. Tensorflow Lite supports +this conversion from the graph resulting from `create_eval_graph`. + +First, create a frozen graph that will be the input for the TensorFlow Lite +toolchain: + +``` +bazel build tensorflow/python/tools:freeze_graph && \ + bazel-bin/tensorflow/python/tools/freeze_graph \ + --input_graph=eval_graph_def.pb \ + --input_checkpoint=checkpoint \ + --output_graph=frozen_eval_graph.pb --output_node_names=outputs ``` -You'll see that this runs the newly-quantized graph, and outputs a very similar -answer to the original. - -You can run the same process on your own models saved out as GraphDefs, with the -input and output names adapted to those your network requires. I recommend that -you run them through the freeze_graph script first, to convert checkpoints into -constants stored in the file. - -## How Does the Quantization Process Work? - -We've implemented quantization by writing equivalent eight-bit versions of -operations that are commonly used during inference. These include convolution, -matrix multiplication, activation functions, pooling operations and -concatenation. The conversion script first replaces all the individual ops it -knows about with quantized equivalents. These are small sub-graphs that have -conversion functions before and after to move the data between float and -eight-bit. Below is an example of what they look like. First here's the original -Relu operation, with float inputs and outputs: - -![Relu Diagram](https://www.tensorflow.org/images/quantization0.png) - -Then, this is the equivalent converted subgraph, still with float inputs and -outputs, but with internal conversions so the calculations are done in eight -bit. - -![Converted Diagram](https://www.tensorflow.org/images/quantization1.png) - -The min and max operations actually look at the values in the input float -tensor, and then feeds them into the Dequantize operation that converts the -tensor into eight-bits. There are more details on how the quantized representation -works later on. - -Once the individual operations have been converted, the next stage is to remove -unnecessary conversions to and from float. If there are consecutive sequences of -operations that all have float equivalents, then there will be a lot of adjacent -Dequantize/Quantize ops. This stage spots that pattern, recognizes that they -cancel each other out, and removes them, like this: - -![Stripping Diagram](https://www.tensorflow.org/images/quantization2.png) - -Applied on a large scale to models where all of the operations have quantized -equivalents, this gives a graph where all of the tensor calculations are done in -eight bit, without having to convert to float. - -## What Representation is Used for Quantized Tensors? - -We approach converting floating-point arrays of numbers into eight-bit -representations as a compression problem. We know that the weights and -activation tensors in trained neural network models tend to have values that are -distributed across comparatively small ranges (for example you might have -15 to -+15 for weights, -500 to 1000 for activations on an image model, though the -exact numbers will vary). We also know from experiment that neural nets tend to -be very robust in the face of noise, and so the noise-like error produced by -quantizing down to a small set of values will not hurt the precision of the -overall results very much. We also want to pick a representation that's easy to -perform calculations on, especially the large matrix multiplications that form -the bulk of the work that's needed to run a model. - -These led us to pick a representation that has two floats to store the overall -minimum and maximum values that are represented by the lowest and highest -quantized value. Each entry in the quantized array represents a float value in -that range, distributed linearly between the minimum and maximum. For example, -if we have minimum = -10.0, and maximum = 30.0f, and an eight-bit array, here's -what the quantized values represent: +Provide this to the TensorFlow Lite Optimizing Converter (TOCO) to get a fully +quantized TensorFLow Lite model: ``` -Quantized | Float ---------- | ----- -0 | -10.0 -255 | 30.0 -128 | 10.0 +bazel build tensorflow/contrib/lite/toco:toco && \ + ./bazel-bin/third_party/tensorflow/contrib/lite/toco/toco \ + --input_file=frozen_eval_graph.pb \ + --output_file=tflite_model.tflite \ + --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ + --inference_type=QUANTIZED_UINT8 \ + --input_shape="1,224, 224,3" \ + --input_array=input \ + --output_array=outputs \ + --std_value=127.5 --mean_value=127.5 ``` -The advantages of this format are that it can represent arbitrary magnitudes of -ranges, they don't have to be symmetrical, it can represent signed and unsigned -values, and the linear spread makes doing multiplications straightforward. There -are alternatives like [Song Han's code books](http://arxiv.org/pdf/1510.00149.pdf) -that can use lower bit depths by non-linearly distributing the float values -across the representation, but these tend to be more expensive to calculate on. - -The advantage of having a strong and clear definition of the quantized format is -that it's always possible to convert back and forth from float for operations -that aren't quantization-ready, or to inspect the tensors for debugging -purposes. One implementation detail in TensorFlow that we're hoping to improve -in the future is that the minimum and maximum float values need to be passed as -separate tensors to the one holding the quantized values, so graphs can get a -bit dense! - -The nice thing about the minimum and maximum ranges is that they can often be -pre-calculated. Weight parameters are constants known at load time, so their -ranges can also be stored as constants. We often know the ranges for inputs (for -examples images are usually RGB values in the range 0.0 to 255.0), and many -activation functions have known ranges too. This can avoid having to analyze the -outputs of an operation to determine the range, which we need to do for math ops -like convolution or matrix multiplication which produce 32-bit accumulated -results from 8-bit inputs. - -## What's Next? - -We've found that we can get extremely good performance on mobile and embedded -devices by using eight-bit arithmetic rather than floating-point. You can see -the framework we use to optimize matrix multiplications at -[gemmlowp](https://github.com/google/gemmlowp). We still need to apply all the -lessons we've learned to the TensorFlow ops to get maximum performance on -mobile, but we're actively working on that. Right now, this quantized -implementation is a reasonably fast and accurate reference implementation that -we're hoping will enable wider support for our eight-bit models on a wider -variety of devices. We also hope that this demonstration will encourage the -community to explore what's possible with low-precision neural networks. +See the documentation for @{tf.contrib.quantize} and +[TensorFlow Lite](/mobile/tflite/). + +## Quantized accuracy + +Fixed point [MobileNet](https://arxiv.org/abs/1704.0486) models are released with +8-bit weights and activations. Using the rewriters, these models achieve the +Top-1 accuracies listed in Table 1. For comparison, the floating point accuracies +are listed for the same models. The code used to generate these models +[is available](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md) +along with links to all of the pretrained mobilenet_v1 models. + +
+ + + + + + + + + + + + + + + + + + + + + + + +
Image SizeDepthTop-1 Accuracy:
Floating point
Top-1 Accuracy:
Fixed point: 8 bit weights and activations
1280.250.4150.399
1280.50.5630.549
1280.750.6210.598
12810.6520.64
1600.250.4550.435
1600.50.5910.577
1600.750.6530.639
16010.680.673
1920.250.4770.458
1920.50.6170.604
1920.750.6720.662
19210.70.69
2240.250.4980.482
2240.50.6330.622
2240.750.6840.679
22410.7090.697
+
+ Table 1: MobileNet Top-1 accuracy on Imagenet Validation dataset. +
+
+ +## Representation for quantized tensors + +TensorFlow approaches the conversion of floating-point arrays of numbers into +8-bit representations as a compression problem. Since the weights and activation +tensors in trained neural network models tend to have values that are distributed +across comparatively small ranges (for example, -15 to +15 for weights or -500 to +1000 for image model activations). And since neural nets tend to be robust +handling noise, the error introduced by quantizing to a small set of values +maintains the precision of the overall results within an acceptable threshold. A +chosen representation must perform fast calculations, especially the large matrix +multiplications that comprise the bulk of the computations while running a model. + +This is represented with two floats that store the overall minimum and maximum +values corresponding to the lowest and highest quantized value. Each entry in the +quantized array represents a float value in that range, distributed linearly +between the minimum and maximum. For example, with a minimum of -10.0 and maximum +of 30.0f, and an 8-bit array, the quantized values represent the following: + +
+ + + + + +
QuantizedFloat
0-10.0
25530.0
12810.0
+
+ Table 2: Example quantized value range +
+
+ +The advantages of this representation format are: + +* It efficiently represents an arbitrary magnitude of ranges. +* The values don't have to be symmetrical. +* The format represents both signed and unsigned values. +* The linear spread makes multiplications straightforward. + +Alternative techniques use lower bit depths by non-linearly distributing the +float values across the representation, but currently are more expensive in terms +of computation time. (See Han et al., +[2016](https://arxiv.org/abs/1510.00149).) + +The advantage of having a clear definition of the quantized format is that it's +always possible to convert back and forth from fixed-point to floating-point for +operations that aren't quantization-ready, or to inspect the tensors for +debugging. -- GitLab From 6fdb9ad1baf7686a75f9e660178f7ac595e7fc2e Mon Sep 17 00:00:00 2001 From: 4d55397500 <4d55397500@users.noreply.github.com> Date: Wed, 28 Feb 2018 17:57:35 -0800 Subject: [PATCH 183/311] Fix return value in sampled_softmax_loss --- tensorflow/python/ops/nn_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 5fa5708114..254f0051a4 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1345,4 +1345,4 @@ def sampled_softmax_loss(weights, sampled_losses = nn_ops.softmax_cross_entropy_with_logits( labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. - return sampled_losses + return sampled_losses \ No newline at end of file -- GitLab From f5e2a70e0363c1b08a342e395c4e040114b7a424 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 28 Feb 2018 17:54:56 -0800 Subject: [PATCH 184/311] Set generated ops to hidden if they are not included in TensorFlow Python API. Also, update endpoints in ApiDef files for a few ops. PiperOrigin-RevId: 187412039 --- tensorflow/core/api_def/python_api/api_def_Abort.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorApplyGradient.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorNumAccumulated.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorSetGlobalStep.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorTakeGradient.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_AdjustContrast.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_AdjustHue.pbtxt | 4 ++++ .../api_def/python_api/api_def_AdjustSaturation.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdadelta.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdagrad.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdam.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAddSign.pbtxt | 4 ++++ .../python_api/api_def_ApplyCenteredRMSProp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyFtrl.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt | 4 ++++ .../python_api/api_def_ApplyGradientDescent.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyMomentum.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyPowerSign.pbtxt | 4 ++++ .../python_api/api_def_ApplyProximalAdagrad.pbtxt | 4 ++++ .../api_def_ApplyProximalGradientDescent.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_ApproximateEqual.pbtxt | 4 ++++ .../api_def/python_api/api_def_AssignAddVariableOp.pbtxt | 4 ++++ .../api_def/python_api/api_def_AssignSubVariableOp.pbtxt | 4 ++++ .../api_def/python_api/api_def_AssignVariableOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_AvgPool3D.pbtxt | 6 ++++++ .../core/api_def/python_api/api_def_BatchDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixBandPart.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixDiag.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_BiasAddGrad.pbtxt | 4 ++++ .../python_api/api_def_BytesProducedStatsDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_CacheDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_CholeskyGrad.pbtxt | 4 ++++ .../api_def/python_api/api_def_CompareAndBitpack.pbtxt | 4 ++++ .../api_def/python_api/api_def_ConcatenateDataset.pbtxt | 4 ++++ .../python_api/api_def_ConditionalAccumulator.pbtxt | 4 ++++ .../api_def/python_api/api_def_ConsumeMutexLock.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ControlTrigger.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt | 6 ++++++ .../python_api/api_def_Conv2DBackpropFilter.pbtxt | 6 ++++++ .../api_def/python_api/api_def_Conv2DBackpropInput.pbtxt | 6 ++++++ tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt | 6 ++++++ .../python_api/api_def_Conv3DBackpropFilter.pbtxt | 4 ++++ .../python_api/api_def_Conv3DBackpropFilterV2.pbtxt | 6 ++++++ .../api_def/python_api/api_def_Conv3DBackpropInput.pbtxt | 4 ++++ .../python_api/api_def_Conv3DBackpropInputV2.pbtxt | 4 ++++ .../python_api/api_def_CropAndResizeGradBoxes.pbtxt | 4 ++++ .../python_api/api_def_CropAndResizeGradImage.pbtxt | 4 ++++ .../api_def/python_api/api_def_DataFormatDimMap.pbtxt | 4 ++++ .../python_api/api_def_DataFormatVecPermute.pbtxt | 4 ++++ .../python_api/api_def_DatasetToSingleElement.pbtxt | 4 ++++ .../api_def/python_api/api_def_DecodeCompressed.pbtxt | 4 ++++ .../python_api/api_def_DenseToDenseSetOperation.pbtxt | 4 ++++ .../python_api/api_def_DenseToSparseBatchDataset.pbtxt | 4 ++++ .../python_api/api_def_DenseToSparseSetOperation.pbtxt | 4 ++++ .../api_def/python_api/api_def_DeserializeIterator.pbtxt | 4 ++++ .../api_def/python_api/api_def_DestroyResourceOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_Dilation2D.pbtxt | 6 ++++++ .../python_api/api_def_Dilation2DBackpropFilter.pbtxt | 4 ++++ .../python_api/api_def_Dilation2DBackpropInput.pbtxt | 4 ++++ .../python_api/api_def_EnqueueInQueueDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt | 9 +++++++++ tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt | 9 +++++++++ .../core/api_def/python_api/api_def_FilterDataset.pbtxt | 4 ++++ .../python_api/api_def_FixedLengthRecordDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_FlatMapDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_FusedBatchNormGrad.pbtxt | 4 ++++ .../python_api/api_def_FusedBatchNormGradV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_FusedPadConv2D.pbtxt | 4 ++++ .../python_api/api_def_FusedResizeAndPadConv2D.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_GatherV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_GeneratorDataset.pbtxt | 4 ++++ .../python_api/api_def_GroupByWindowDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt | 9 +++++++++ tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt | 9 +++++++++ tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ImmutableConst.pbtxt | 4 ++++ .../api_def/python_api/api_def_InterleaveDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Inv.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_Iterator.pbtxt | 4 ++++ .../python_api/api_def_IteratorFromStringHandle.pbtxt | 4 ++++ .../api_def/python_api/api_def_IteratorGetNext.pbtxt | 4 ++++ .../api_def/python_api/api_def_IteratorGetNextSync.pbtxt | 4 ++++ .../python_api/api_def_IteratorSetStatsAggregator.pbtxt | 4 ++++ .../python_api/api_def_IteratorToStringHandle.pbtxt | 4 ++++ .../api_def/python_api/api_def_LatencyStatsDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_LoopCond.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MakeIterator.pbtxt | 4 ++++ .../api_def/python_api/api_def_MapAndBatchDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapClear.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_MapIncompleteSize.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapStage.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapUnstage.pbtxt | 4 ++++ .../api_def/python_api/api_def_MapUnstageNoKey.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MaxPool3D.pbtxt | 6 ++++++ .../api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_MergeV2Checkpoints.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MutexLock.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_NextIteration.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_NthElement.pbtxt | 4 ++++ .../api_def/python_api/api_def_OneShotIterator.pbtxt | 4 ++++ .../api_def/python_api/api_def_OrderedMapClear.pbtxt | 4 ++++ .../python_api/api_def_OrderedMapIncompleteSize.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_OrderedMapPeek.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_OrderedMapSize.pbtxt | 4 ++++ .../api_def/python_api/api_def_OrderedMapStage.pbtxt | 4 ++++ .../api_def/python_api/api_def_OrderedMapUnstage.pbtxt | 4 ++++ .../python_api/api_def_OrderedMapUnstageNoKey.pbtxt | 4 ++++ .../api_def/python_api/api_def_PaddedBatchDataset.pbtxt | 4 ++++ .../python_api/api_def_ParallelDynamicStitch.pbtxt | 4 ++++ .../python_api/api_def_ParallelInterleaveDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_ParallelMapDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_PlaceholderV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_PopulationCount.pbtxt | 4 ++++ .../api_def/python_api/api_def_PrefetchDataset.pbtxt | 4 ++++ .../api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_PreventGradient.pbtxt | 4 ++++ .../python_api/api_def_QuantizeAndDequantize.pbtxt | 4 ++++ .../python_api/api_def_QuantizeAndDequantizeV2.pbtxt | 4 ++++ .../python_api/api_def_QuantizeAndDequantizeV3.pbtxt | 4 ++++ .../python_api/api_def_QuantizeDownAndShrinkRange.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedAdd.pbtxt | 4 ++++ ...i_def_QuantizedBatchNormWithGlobalNormalization.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedBiasAdd.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedConv2D.pbtxt | 6 ++++++ .../python_api/api_def_QuantizedInstanceNorm.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedMatMul.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedMul.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedRelu.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedRelu6.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedReshape.pbtxt | 4 ++++ .../python_api/api_def_QuantizedResizeBilinear.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QueueIsClosed.pbtxt | 4 ++++ .../api_def/python_api/api_def_QueueIsClosedV2.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RandomDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_RandomPoissonV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RangeDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ReadVariableOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RecordInput.pbtxt | 4 ++++ .../api_def/python_api/api_def_RefNextIteration.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RefSelect.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RefSwitch.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RemoteCall.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RepeatDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_RequantizationRange.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_Requantize.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAdadelta.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAdagrad.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAdagradDA.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceApplyAdam.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAddSign.pbtxt | 4 ++++ .../api_def_ResourceApplyCenteredRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceApplyFtrl.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt | 4 ++++ .../api_def_ResourceApplyGradientDescent.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyMomentum.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyPowerSign.pbtxt | 4 ++++ .../api_def_ResourceApplyProximalAdagrad.pbtxt | 4 ++++ .../api_def_ResourceApplyProximalGradientDescent.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceCountUpTo.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ResourceGather.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceScatterAdd.pbtxt | 4 ++++ .../python_api/api_def_ResourceScatterNdUpdate.pbtxt | 4 ++++ .../python_api/api_def_ResourceScatterUpdate.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyAdadelta.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyAdagrad.pbtxt | 4 ++++ .../api_def_ResourceSparseApplyAdagradDA.pbtxt | 4 ++++ .../api_def_ResourceSparseApplyCenteredRMSProp.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyFtrl.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyMomentum.pbtxt | 4 ++++ .../api_def_ResourceSparseApplyProximalAdagrad.pbtxt | 4 ++++ ..._def_ResourceSparseApplyProximalGradientDescent.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyRMSProp.pbtxt | 4 ++++ .../python_api/api_def_ResourceStridedSliceAssign.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RestoreV2.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Roll.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ScanDataset.pbtxt | 4 ++++ .../python_api/api_def_ScatterNdNonAliasingAdd.pbtxt | 4 ++++ .../api_def/python_api/api_def_SerializeIterator.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt | 4 ++++ .../python_api/api_def_ShuffleAndRepeatDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ShuffleDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SkipDataset.pbtxt | 4 ++++ .../api_def_SparseAccumulatorApplyGradient.pbtxt | 4 ++++ .../api_def_SparseAccumulatorTakeGradient.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyAdadelta.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyAdagrad.pbtxt | 4 ++++ .../python_api/api_def_SparseApplyAdagradDA.pbtxt | 4 ++++ .../python_api/api_def_SparseApplyCenteredRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyFtrl.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyMomentum.pbtxt | 4 ++++ .../python_api/api_def_SparseApplyProximalAdagrad.pbtxt | 4 ++++ .../api_def_SparseApplyProximalGradientDescent.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyRMSProp.pbtxt | 4 ++++ .../api_def_SparseConditionalAccumulator.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt | 4 ++++ .../python_api/api_def_SparseSegmentMeanGrad.pbtxt | 4 ++++ .../api_def_SparseSegmentMeanWithNumSegments.pbtxt | 4 ++++ .../python_api/api_def_SparseSegmentSqrtNGrad.pbtxt | 4 ++++ .../api_def_SparseSegmentSqrtNWithNumSegments.pbtxt | 4 ++++ .../api_def_SparseSegmentSumWithNumSegments.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseSparseMaximum.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseSparseMinimum.pbtxt | 4 ++++ .../python_api/api_def_SparseTensorSliceDataset.pbtxt | 4 ++++ .../python_api/api_def_SparseToSparseSetOperation.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SqlDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Stage.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_StageClear.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_StagePeek.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_StageSize.pbtxt | 4 ++++ .../python_api/api_def_StatsAggregatorHandle.pbtxt | 4 ++++ .../python_api/api_def_StatsAggregatorSummary.pbtxt | 4 ++++ .../api_def/python_api/api_def_StridedSliceAssign.pbtxt | 4 ++++ .../api_def/python_api/api_def_StridedSliceGrad.pbtxt | 4 ++++ .../api_def/python_api/api_def_TFRecordDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_TakeDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_TensorDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_TensorSliceDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_TextLineDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_VarHandleOp.pbtxt | 4 ++++ .../api_def/python_api/api_def_VarIsInitializedOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_VariableShape.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ZipDataset.pbtxt | 4 ++++ 243 files changed, 1010 insertions(+) create mode 100644 tensorflow/core/api_def/python_api/api_def_Abort.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Inv.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Roll.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Stage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt diff --git a/tensorflow/core/api_def/python_api/api_def_Abort.pbtxt b/tensorflow/core/api_def/python_api/api_def_Abort.pbtxt new file mode 100644 index 0000000000..3f95aaf12c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Abort.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Abort" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt new file mode 100644 index 0000000000..1e76d6dadc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorApplyGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt new file mode 100644 index 0000000000..fbe971ab2e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorNumAccumulated" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt new file mode 100644 index 0000000000..0047b25af6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorSetGlobalStep" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt new file mode 100644 index 0000000000..860fbe1245 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorTakeGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt b/tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt new file mode 100644 index 0000000000..0311ad92b7 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AdjustContrast" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt b/tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt new file mode 100644 index 0000000000..b441167711 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AdjustHue" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt b/tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt new file mode 100644 index 0000000000..893219e17a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AdjustSaturation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt new file mode 100644 index 0000000000..d8776b19f1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt new file mode 100644 index 0000000000..7e659c1bb3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..d647c5eb0a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt new file mode 100644 index 0000000000..66d9095c8f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdam" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt new file mode 100644 index 0000000000..b7fe1aa654 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAddSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..56003c5e6f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt new file mode 100644 index 0000000000..680b3ef480 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..5ab3bb6efd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt new file mode 100644 index 0000000000..467bf7db55 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt new file mode 100644 index 0000000000..7c3f0fef95 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt new file mode 100644 index 0000000000..f376b1dc6e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyPowerSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..0c6e2a4bb1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..90c1655fe9 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt new file mode 100644 index 0000000000..18cce1915a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt new file mode 100644 index 0000000000..707f6716f9 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApproximateEqual" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt new file mode 100644 index 0000000000..e30ec092e6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AssignAddVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt new file mode 100644 index 0000000000..81290a56ec --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AssignSubVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt new file mode 100644 index 0000000000..3ffa4a11c4 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AssignVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt new file mode 100644 index 0000000000..cc16523a15 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "AvgPool3D" + endpoint { + name: "nn.avg_pool3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt new file mode 100644 index 0000000000..4289c1daf9 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt new file mode 100644 index 0000000000..0a699e2050 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixBandPart" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt new file mode 100644 index 0000000000..40be51eccc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixDiag" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt new file mode 100644 index 0000000000..1ef78fa5ec --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixDiagPart" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt new file mode 100644 index 0000000000..644c1270a2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixSetDiag" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt new file mode 100644 index 0000000000..9226c6791c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BiasAddGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt new file mode 100644 index 0000000000..fcf541f903 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BytesProducedStatsDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt new file mode 100644 index 0000000000..2bbb4ff9e3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CacheDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt new file mode 100644 index 0000000000..3538afb2a7 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CholeskyGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt b/tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt new file mode 100644 index 0000000000..493a7e4866 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CompareAndBitpack" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt new file mode 100644 index 0000000000..c005a4da0f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ConcatenateDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt b/tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt new file mode 100644 index 0000000000..a4663e8eb3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ConditionalAccumulator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt b/tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt new file mode 100644 index 0000000000..9559947490 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ConsumeMutexLock" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt b/tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt new file mode 100644 index 0000000000..33941493af --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ControlTrigger" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt new file mode 100644 index 0000000000..2ae75d6da2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv2D" + endpoint { + name: "nn.conv2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt new file mode 100644 index 0000000000..6f21d8c880 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv2DBackpropFilter" + endpoint { + name: "nn.conv2d_backprop_filter" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt new file mode 100644 index 0000000000..ea976799cb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv2DBackpropInput" + endpoint { + name: "nn.conv2d_backprop_input" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt new file mode 100644 index 0000000000..ba8d178263 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv3D" + endpoint { + name: "nn.conv3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt new file mode 100644 index 0000000000..634545f427 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Conv3DBackpropFilter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt new file mode 100644 index 0000000000..1da8ee3a25 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv3DBackpropFilterV2" + endpoint { + name: "nn.conv3d_backprop_filter_v2" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt new file mode 100644 index 0000000000..e2b0a0d19f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Conv3DBackpropInput" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt new file mode 100644 index 0000000000..4e5c4f74fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Conv3DBackpropInputV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt new file mode 100644 index 0000000000..ac44494193 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CropAndResizeGradBoxes" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt new file mode 100644 index 0000000000..eecd0536f2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CropAndResizeGradImage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt b/tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt new file mode 100644 index 0000000000..82a39cfc59 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DataFormatDimMap" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt b/tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt new file mode 100644 index 0000000000..9ec292df8f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DataFormatVecPermute" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt b/tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt new file mode 100644 index 0000000000..e3d34cc15b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DatasetToSingleElement" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt b/tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt new file mode 100644 index 0000000000..f0b7539918 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DecodeCompressed" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt b/tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt new file mode 100644 index 0000000000..1c47ec09c5 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DenseToDenseSetOperation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt new file mode 100644 index 0000000000..0a8e068afb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DenseToSparseBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt b/tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt new file mode 100644 index 0000000000..a30757df4d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DenseToSparseSetOperation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt new file mode 100644 index 0000000000..170d37be4e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DeserializeIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt new file mode 100644 index 0000000000..b9dde0080a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DestroyResourceOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt new file mode 100644 index 0000000000..6d73ecf1bb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Dilation2D" + endpoint { + name: "nn.dilation2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt new file mode 100644 index 0000000000..feb9f083db --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Dilation2DBackpropFilter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt new file mode 100644 index 0000000000..9a6b09f5cc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Dilation2DBackpropInput" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt new file mode 100644 index 0000000000..051cf14c0e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "EnqueueInQueueDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt new file mode 100644 index 0000000000..9ed1341dfe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "FFT2D" + endpoint { + name: "spectral.fft2d" + } + endpoint { + name: "fft2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt new file mode 100644 index 0000000000..5a4e1d6adf --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "FFT3D" + endpoint { + name: "spectral.fft3d" + } + endpoint { + name: "fft3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt new file mode 100644 index 0000000000..6f91b84218 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FilterDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt new file mode 100644 index 0000000000..d0703471d3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FixedLengthRecordDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt new file mode 100644 index 0000000000..9de61ac263 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FlatMapDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt new file mode 100644 index 0000000000..56409f32d8 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedBatchNormGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt new file mode 100644 index 0000000000..f5a4200b76 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedBatchNormGradV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt new file mode 100644 index 0000000000..03b5fdd5a1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedPadConv2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt new file mode 100644 index 0000000000..52165d9b4d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedResizeAndPadConv2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt new file mode 100644 index 0000000000..029bc59b51 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "GatherV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt new file mode 100644 index 0000000000..9dcfa0f7d2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "GeneratorDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt new file mode 100644 index 0000000000..8d40208e61 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "GroupByWindowDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt new file mode 100644 index 0000000000..d6b36a314b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "IFFT2D" + endpoint { + name: "spectral.ifft2d" + } + endpoint { + name: "ifft2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt new file mode 100644 index 0000000000..6def5b36da --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "IFFT3D" + endpoint { + name: "spectral.ifft3d" + } + endpoint { + name: "ifft3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt b/tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt new file mode 100644 index 0000000000..8fa74a4317 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IRFFT" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt new file mode 100644 index 0000000000..2021cad639 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IRFFT2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt new file mode 100644 index 0000000000..5d1eab6003 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IRFFT3D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt b/tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt new file mode 100644 index 0000000000..997013914b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ImmutableConst" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt new file mode 100644 index 0000000000..ef1b06b19c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "InterleaveDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Inv.pbtxt b/tensorflow/core/api_def/python_api/api_def_Inv.pbtxt new file mode 100644 index 0000000000..ed58a276f6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Inv.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Inv" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt new file mode 100644 index 0000000000..a021db1534 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Iterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt new file mode 100644 index 0000000000..f9efe2d144 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorFromStringHandle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt new file mode 100644 index 0000000000..f7066484ce --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorGetNext" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt new file mode 100644 index 0000000000..d94edbc71d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorGetNextSync" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt new file mode 100644 index 0000000000..db51ae3873 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorSetStatsAggregator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt new file mode 100644 index 0000000000..8a4251f76b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorToStringHandle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt new file mode 100644 index 0000000000..94bf6106ad --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "LatencyStatsDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt b/tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt new file mode 100644 index 0000000000..4cfa295b2a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "LoopCond" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt new file mode 100644 index 0000000000..acc3342c9b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MakeIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt new file mode 100644 index 0000000000..cffd2910fb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapAndBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt new file mode 100644 index 0000000000..67c1c3e2dd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapClear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt new file mode 100644 index 0000000000..0b1d2f2c73 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt new file mode 100644 index 0000000000..db7921e13b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapIncompleteSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt new file mode 100644 index 0000000000..85fab17229 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapPeek" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt new file mode 100644 index 0000000000..8b6ed1a0cf --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt new file mode 100644 index 0000000000..3ae70d5d57 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapStage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt new file mode 100644 index 0000000000..e5f92e37db --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapUnstage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt new file mode 100644 index 0000000000..2c2a25db21 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapUnstageNoKey" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt new file mode 100644 index 0000000000..e8576c9ff2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "MaxPool3D" + endpoint { + name: "nn.max_pool3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt new file mode 100644 index 0000000000..534cc90e41 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MaxPoolGradGradV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt new file mode 100644 index 0000000000..e79f839686 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MaxPoolGradV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt b/tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt new file mode 100644 index 0000000000..ca9f74e0c1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MergeV2Checkpoints" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt b/tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt new file mode 100644 index 0000000000..74e6e10357 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MutexLock" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt new file mode 100644 index 0000000000..013f42d855 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MutexV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt b/tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt new file mode 100644 index 0000000000..28ac301e41 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "NextIteration" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt b/tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt new file mode 100644 index 0000000000..ec83858510 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "NthElement" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt new file mode 100644 index 0000000000..ee9d777b4e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OneShotIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt new file mode 100644 index 0000000000..b8276b964a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapClear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt new file mode 100644 index 0000000000..1ba6c5b2fc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapIncompleteSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt new file mode 100644 index 0000000000..8f0c7afd46 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapPeek" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt new file mode 100644 index 0000000000..2e155726da --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt new file mode 100644 index 0000000000..6222c1fc4c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapStage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt new file mode 100644 index 0000000000..5cca8d9f93 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapUnstage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt new file mode 100644 index 0000000000..d67b95b65b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapUnstageNoKey" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt new file mode 100644 index 0000000000..c6223b3132 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PaddedBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt new file mode 100644 index 0000000000..a36ad27364 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParallelDynamicStitch" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt new file mode 100644 index 0000000000..93cd5719fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParallelInterleaveDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt new file mode 100644 index 0000000000..09d200dd24 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParallelMapDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt new file mode 100644 index 0000000000..a30360d2de --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PlaceholderV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt b/tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt new file mode 100644 index 0000000000..d35550236a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PopulationCount" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt new file mode 100644 index 0000000000..ec4e214eb5 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PrefetchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt new file mode 100644 index 0000000000..228c4047d2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PrependFromQueueAndPaddedBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt new file mode 100644 index 0000000000..9565f5632b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PreventGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt new file mode 100644 index 0000000000..d2468f1b24 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeAndDequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt new file mode 100644 index 0000000000..15e181be20 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeAndDequantizeV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt new file mode 100644 index 0000000000..f1edc6f5fa --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeAndDequantizeV3" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt new file mode 100644 index 0000000000..9a2a86d25d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeDownAndShrinkRange" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt new file mode 100644 index 0000000000..b952d6eccb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt new file mode 100644 index 0000000000..e009ada553 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedBatchNormWithGlobalNormalization" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt new file mode 100644 index 0000000000..3432962e59 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedBiasAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt new file mode 100644 index 0000000000..2409d12abe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "QuantizedConv2D" + endpoint { + name: "nn.quantized_conv2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt new file mode 100644 index 0000000000..47a4931a05 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedInstanceNorm" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt new file mode 100644 index 0000000000..3ca9d2ae07 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedMatMul" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt new file mode 100644 index 0000000000..c026fba194 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedMul" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt new file mode 100644 index 0000000000..e5da4f25f0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedRelu" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt new file mode 100644 index 0000000000..ef1e648312 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedRelu6" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt new file mode 100644 index 0000000000..7e6d9ed718 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedReshape" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt new file mode 100644 index 0000000000..a8da4128c2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedResizeBilinear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt b/tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt new file mode 100644 index 0000000000..f1d2ef63f1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QueueIsClosed" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt new file mode 100644 index 0000000000..07cf1a7497 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QueueIsClosedV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt b/tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt new file mode 100644 index 0000000000..e9719255ae --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RFFT" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt new file mode 100644 index 0000000000..1336a64408 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RFFT2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt new file mode 100644 index 0000000000..978b5814ff --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RFFT3D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt new file mode 100644 index 0000000000..a5f6f8c6f1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RandomDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt new file mode 100644 index 0000000000..8cc217c50e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RandomPoissonV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt new file mode 100644 index 0000000000..4cd8296b22 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RangeDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt new file mode 100644 index 0000000000..e250b78eff --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ReadVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt new file mode 100644 index 0000000000..29f798050e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RecordInput" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt b/tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt new file mode 100644 index 0000000000..f9dfcf5e97 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RefNextIteration" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt b/tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt new file mode 100644 index 0000000000..8f9909aa86 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RefSelect" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt b/tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt new file mode 100644 index 0000000000..68b0f4a694 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RefSwitch" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt b/tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt new file mode 100644 index 0000000000..fc069d857d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RemoteCall" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt new file mode 100644 index 0000000000..be301da838 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RepeatDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt b/tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt new file mode 100644 index 0000000000..e327595a38 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RequantizationRange" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt b/tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt new file mode 100644 index 0000000000..f26f0611ba --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Requantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt new file mode 100644 index 0000000000..e0413a67a3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt new file mode 100644 index 0000000000..52b8ba0b0e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..edfc0a733f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt new file mode 100644 index 0000000000..ca2713b533 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdam" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt new file mode 100644 index 0000000000..50dd643953 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAddSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..20592e38c8 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt new file mode 100644 index 0000000000..72b49e09d6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..af1d24c344 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt new file mode 100644 index 0000000000..75d6afd426 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt new file mode 100644 index 0000000000..3e499cf72e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt new file mode 100644 index 0000000000..b23ad0d061 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyPowerSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..6ad124c590 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..d684a5dd67 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt new file mode 100644 index 0000000000..c4c20e1382 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt new file mode 100644 index 0000000000..87376b7447 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceCountUpTo" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt new file mode 100644 index 0000000000..714ba4a7ca --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceGather" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt new file mode 100644 index 0000000000..4d4601cafd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt new file mode 100644 index 0000000000..54c66708ae --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterNdUpdate" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt new file mode 100644 index 0000000000..30f885bee0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterUpdate" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt new file mode 100644 index 0000000000..a7e4dad138 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt new file mode 100644 index 0000000000..1388da789c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..c5beaa4f58 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..f3de3d93df --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt new file mode 100644 index 0000000000..f83833d351 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..71adbb0bcd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt new file mode 100644 index 0000000000..28a19caacc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..e8cda7f4ed --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..5fa1ade669 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt new file mode 100644 index 0000000000..86cc9a41ae --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt new file mode 100644 index 0000000000..ef6e19fea0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceStridedSliceAssign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt new file mode 100644 index 0000000000..34d07239a1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RestoreV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Roll.pbtxt b/tensorflow/core/api_def/python_api/api_def_Roll.pbtxt new file mode 100644 index 0000000000..9cc919f36f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Roll.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Roll" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt new file mode 100644 index 0000000000..617897ee44 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SaveV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt new file mode 100644 index 0000000000..e71b655c22 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ScanDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt new file mode 100644 index 0000000000..ecf71cd625 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ScatterNdNonAliasingAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt new file mode 100644 index 0000000000..07d2f200fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SerializeIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt new file mode 100644 index 0000000000..ee9c71036b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SetSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt new file mode 100644 index 0000000000..7b0d2994f0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ShuffleAndRepeatDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt new file mode 100644 index 0000000000..8f0be9197a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ShuffleDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt new file mode 100644 index 0000000000..96a551c5b6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SkipDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt new file mode 100644 index 0000000000..5e158c9ca0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseAccumulatorApplyGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt new file mode 100644 index 0000000000..5326f23def --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseAccumulatorTakeGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt new file mode 100644 index 0000000000..d30a8676e0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt new file mode 100644 index 0000000000..cb5ddef212 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..c3b87b0953 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..db47328738 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt new file mode 100644 index 0000000000..14e37b8ba2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..0d307af9b4 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt new file mode 100644 index 0000000000..ed34c0485d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..ff2d3b6731 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..f342a611bb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt new file mode 100644 index 0000000000..7f337d50e5 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt new file mode 100644 index 0000000000..bad4120795 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseConditionalAccumulator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt new file mode 100644 index 0000000000..c5e7c9851f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseDenseCwiseAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt new file mode 100644 index 0000000000..f72031cf68 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseDenseCwiseDiv" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt new file mode 100644 index 0000000000..a87004ee5f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseDenseCwiseMul" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt new file mode 100644 index 0000000000..771083cd51 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentMeanGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt new file mode 100644 index 0000000000..fcb029535c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentMeanWithNumSegments" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt new file mode 100644 index 0000000000..0682a597bb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSqrtNGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt new file mode 100644 index 0000000000..7311a093df --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSqrtNWithNumSegments" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt new file mode 100644 index 0000000000..81c2b8554e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSumWithNumSegments" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt new file mode 100644 index 0000000000..0dbadc01ed --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSparseMaximum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt new file mode 100644 index 0000000000..0e3ffcbddf --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSparseMinimum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt new file mode 100644 index 0000000000..19c0c7f199 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseTensorSliceDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt new file mode 100644 index 0000000000..735ee18e14 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseToSparseSetOperation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt new file mode 100644 index 0000000000..2ab4c3e441 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SqlDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Stage.pbtxt b/tensorflow/core/api_def/python_api/api_def_Stage.pbtxt new file mode 100644 index 0000000000..66de5901bc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Stage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Stage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt b/tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt new file mode 100644 index 0000000000..f54a1c1c04 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StageClear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt b/tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt new file mode 100644 index 0000000000..710394d30d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StagePeek" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt new file mode 100644 index 0000000000..472032ac42 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StageSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt new file mode 100644 index 0000000000..f7bed36602 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StatsAggregatorHandle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt new file mode 100644 index 0000000000..8b1bab2440 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StatsAggregatorSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt b/tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt new file mode 100644 index 0000000000..bcf1df228e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StridedSliceAssign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt new file mode 100644 index 0000000000..05d7d57511 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StridedSliceGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt new file mode 100644 index 0000000000..3c270ada3c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TFRecordDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt new file mode 100644 index 0000000000..711b335dc1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TakeDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt new file mode 100644 index 0000000000..5bc3920c56 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TensorDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt new file mode 100644 index 0000000000..89ad016483 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TensorSliceDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt new file mode 100644 index 0000000000..08d785191b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TextLineDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt b/tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt new file mode 100644 index 0000000000..65eb756b87 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Unstage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt new file mode 100644 index 0000000000..2c93a6db93 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "VarHandleOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt new file mode 100644 index 0000000000..de5d9850ac --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "VarIsInitializedOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt b/tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt new file mode 100644 index 0000000000..9b317152dd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "VariableShape" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt new file mode 100644 index 0000000000..dd1459521f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ZipDataset" + visibility: HIDDEN +} -- GitLab From 8c557a579384e2665fd438a944fd416f544a2a81 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Wed, 28 Feb 2018 18:36:57 -0800 Subject: [PATCH 185/311] Use NodeExecStats's output_slot field to identify output instead of just using proto index. PiperOrigin-RevId: 187416101 --- tensorflow/core/common_runtime/step_stats_collector.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index cb900db10a..f21536d586 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -226,13 +226,14 @@ void StepStatsCollector::BuildCostModel( if (node) { for (int i = 0; i < stats.output_size(); ++i) { const auto& output = stats.output(i); - cm->RecordMaxMemorySize(node, i, + int output_slot = output.slot(); + cm->RecordMaxMemorySize(node, output_slot, Bytes(output.tensor_description() .allocation_description() .allocated_bytes()), - stats.output(i).tensor_description().shape(), - node->output_types()[i]); - cm->RecordAllocationId(node, i, + output.tensor_description().shape(), + node->output_types()[output_slot]); + cm->RecordAllocationId(node, output_slot, output.tensor_description() .allocation_description() .allocation_id()); -- GitLab From af6cdb9e5eae7e5e41824336fa5b3084402d43e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 18:56:01 -0800 Subject: [PATCH 186/311] Use half_val instead of int_val to get the bfloat16 tensor value in MakeNdarray. PiperOrigin-RevId: 187417908 --- tensorflow/python/framework/tensor_util.py | 9 ++++----- .../python/framework/tensor_util_test.py | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 27afaa074a..135562e831 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -559,16 +559,16 @@ def MakeNdarray(tensor): if tensor.tensor_content: return (np.frombuffer(tensor.tensor_content, dtype=dtype).copy() .reshape(shape)) - elif tensor_dtype == dtypes.float16: + elif tensor_dtype == dtypes.float16 or tensor_dtype == dtypes.bfloat16: # the half_val field of the TensorProto stores the binary representation # of the fp16: we need to reinterpret this as a proper float16 if len(tensor.half_val) == 1: tmp = np.array(tensor.half_val[0], dtype=np.uint16) - tmp.dtype = np.float16 + tmp.dtype = tensor_dtype.as_numpy_dtype return np.repeat(tmp, num_elements).reshape(shape) else: tmp = np.fromiter(tensor.half_val, dtype=np.uint16) - tmp.dtype = np.float16 + tmp.dtype = tensor_dtype.as_numpy_dtype return tmp.reshape(shape) elif tensor_dtype == dtypes.float32: if len(tensor.float_val) == 1: @@ -586,8 +586,7 @@ def MakeNdarray(tensor): return np.fromiter(tensor.double_val, dtype=dtype).reshape(shape) elif tensor_dtype in [ dtypes.int32, dtypes.uint8, dtypes.uint16, dtypes.int16, dtypes.int8, - dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16, - dtypes.bfloat16 + dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16 ]: if len(tensor.int_val) == 1: return np.repeat(np.array(tensor.int_val[0], dtype=dtype), diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index 6b1b3dd40c..35fff80c61 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -235,6 +235,26 @@ class TensorUtilTest(test.TestCase): self.assertEquals(np.float16, a.dtype) self.assertAllClose(np.array([10.0, 20.0], dtype=np.float16), a) + def testBfloat16(self): + test_type = dtypes.bfloat16.as_numpy_dtype + t = tensor_util.make_tensor_proto(np.array([10.0, 20.0], dtype=test_type)) + # 10.0: 16672 = 010000010(130) 0100000: (1+0/2+1/4) * 2^(130-127) + # 20.0: 16800 = 010000011(131) 0100000: (1+0/2+1/4) * 2^(131-127) + self.assertProtoEquals(""" + dtype: DT_BFLOAT16 + tensor_shape { + dim { + size: 2 + } + } + half_val: 16672 + half_val: 16800 + """, t) + + a = tensor_util.MakeNdarray(t) + self.assertEquals(test_type, a.dtype) + self.assertAllClose(np.array([10.0, 20.0], dtype=test_type), a) + def testInt(self): t = tensor_util.make_tensor_proto(10) self.assertProtoEquals(""" -- GitLab From 63646c32c629f750706c9c63f87735bdbcec4963 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 18:59:41 -0800 Subject: [PATCH 187/311] Add bfloat16 random_op for CPU. PiperOrigin-RevId: 187418131 --- tensorflow/core/kernels/random_op.cc | 1 + .../core/lib/random/random_distributions.h | 119 ++++++++++++++++++ .../lib/random/random_distributions_test.cc | 24 +++- 3 files changed, 142 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index 78ff7948fb..e37232539f 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -495,6 +495,7 @@ class RandomGammaOp : public OpKernel { RandomUniformIntOp); TF_CALL_half(REGISTER); +TF_CALL_bfloat16(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); TF_CALL_int32(REGISTER_INT); diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 3fe1f9bc6c..2ebe608fc9 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -32,6 +32,8 @@ namespace random { // Helper function to convert a 16-bit integer to a half between [0..1). PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x); +// Helper function to convert a 16-bit integer to a bfloat16 between [0..1). +PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x); // Helper function to convert a 32-bit integer to a float between [0..1). PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); // Helper function to convert two 32-bit integers to a double between [0..1). @@ -75,6 +77,30 @@ class UniformDistribution { } }; +template +class UniformDistribution { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount; + // Cost of generation of a single element (in cycles). + static const int kElementCost = 3; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array ResultType; + typedef bfloat16 ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; ++i) { + result[i] = Uint16ToGfloat16(sample[i]); + } + return result; + } +}; + template class UniformDistribution { public: @@ -305,6 +331,36 @@ class NormalDistribution { } }; +template +class NormalDistribution { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount; + // Cost of generation of a single element (in cycles). + static const int kElementCost = 70; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array ResultType; + typedef bfloat16 ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + static_assert(kResultElementCount % 2 == 0, + "kResultElementCount should be an even number"); + for (int i = 0; i < kResultElementCount; i += 2) { + float f[2]; + // Box-Muller transform requires processing 2 elements at a time. + BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]); + result[i] = bfloat16(f[0]); + result[i + 1] = bfloat16(f[1]); + } + return result; + } +}; + template class NormalDistribution { public: @@ -414,6 +470,48 @@ class TruncatedNormalDistribution { } }; +template +class TruncatedNormalDistribution { + public: + // The number of elements that will be returned. + static const int kResultElementCount = + SingleSampleGenerator::kNativeElementCount; + // Cost of generation of a single element (in cycles). + static const int kElementCost = 90; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = true; + // The threshold where the normal distribution is truncated. + const float kTruncateValue = 2.0f; + + typedef Array ResultType; + typedef bfloat16 ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(SingleSampleGenerator* gen) { + ResultType results; + int index = 0; + while (true) { + // Repeatedly take samples from the normal distribution, until we have + // the desired number of elements that fall within the pre-defined cutoff + // threshold. + const uint32 x0 = (*gen)(); + const uint32 x1 = (*gen)(); + float f[2]; + BoxMullerFloat(x0, x1, &f[0], &f[1]); + + for (int i = 0; i < 2; ++i) { + if (Eigen::numext::abs(f[i]) < kTruncateValue) { + results[index++] = bfloat16(f[i]); + if (index >= kResultElementCount) { + return results; + } + } + } + } + } +}; + // Partial specialization for float. template class TruncatedNormalDistribution { @@ -567,6 +665,27 @@ PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) { return result - Eigen::half(1.0); } +// Helper function to convert an 16-bit integer to a bfloat16 between [0..1). +// This can create a uniform distribution of values between [0..1). +PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) { + // bfloat are formatted as follows (MSB first): + // sign(1) exponent(8) mantissa(7) + // Conceptually construct the following: + // sign == 0 + // exponent == 127 -- an excess 127 representation of a zero exponent + // mantissa == 7 random bits + const uint16 man = x & 0x7fu; // 7 bit mantissa + const uint16 exp = static_cast(127); + const uint16 val = (exp << 7) | man; + + bfloat16 result; + memcpy(&result, &val, sizeof(val)); + // The mantissa has an implicit leading 1, so the above code creates a value + // in [1, 2). The minus will not cause a rounding that makes the result 1. + // Instead it will just be close to 1. + return result - bfloat16(1.0); +} + // Helper function to convert an 32-bit integer to a float between [0..1). PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { // IEEE754 floats are formatted as follows (MSB first): diff --git a/tensorflow/core/lib/random/random_distributions_test.cc b/tensorflow/core/lib/random/random_distributions_test.cc index 85d68f456e..8868672a10 100644 --- a/tensorflow/core/lib/random/random_distributions_test.cc +++ b/tensorflow/core/lib/random/random_distributions_test.cc @@ -37,6 +37,10 @@ namespace { // unit normal distribution, it should almost definitely never exceed 6. static constexpr float kZLimit = 6.0; +// As bfloat16 has much less precision, the largest z-value will should be +// larger than float32. +static constexpr float kZLimitBfloat16 = 20.0; + // A utility function to fill the given array with samples from the given // distribution, using the single adapter of the underlying generator template @@ -93,7 +97,7 @@ bool CheckSamplesMoments(const std::vector& samples, // mode, given the large number of samples. moments_data[i] += moment; ++moments_sample_count_data[i]; - moment *= samples_data[index]; + moment *= static_cast(samples_data[index]); } } @@ -125,7 +129,7 @@ bool CheckSamplesMoments(const std::vector& samples, const double z_test = fabs((moments[i] - moments_i_mean) / sqrt(total_variance)); - if (z_test > z_limit) { + if (z_test > static_cast(z_limit)) { LOG(ERROR) << "failing z_test:" << " moment: " << i << " stride: " << stride << " z_test: " << z_test << " z_limit: " << z_limit @@ -252,6 +256,22 @@ void RandomParametersMomentsTest(int count, int max_moments, } } +TEST(PhiloxRandomTest, UniformBfloat16MomentsTest) { + const std::vector strides = {0, 1, 4, 17}; + UniformMomentsTest(1 << 20, 40, strides, bfloat16(kZLimitBfloat16)); +} + +TEST(PhiloxRandomTest, NormalBfloat16MomentsTest) { + const std::vector strides = {0, 1, 4, 17}; + NormalMomentsTest(8 << 20, 25, strides, bfloat16(kZLimitBfloat16)); +} + +TEST(PhiloxRandomTest, RandomParametersBfloat16MomentsTest) { + const std::vector strides = {0, 1, 4, 17}; + RandomParametersMomentsTest(1 << 20, 40, strides, + bfloat16(kZLimitBfloat16)); +} + TEST(PhiloxRandomTest, UniformFloatMomentsTest) { const std::vector strides = {0, 1, 4, 17}; UniformMomentsTest(1 << 20, 40, strides, kZLimit); -- GitLab From 16b4fbd56f1b460cefa41c6c50864c0245ecad91 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 28 Feb 2018 21:07:39 -0800 Subject: [PATCH 188/311] [XLA] Reshape/Transpose should not be bitcast if element type changes. PiperOrigin-RevId: 187427133 --- tensorflow/compiler/xla/shape_util.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 3152789016..9810e818f6 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1076,6 +1076,10 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, CHECK(LayoutUtil::HasLayout(input_shape) && LayoutUtil::HasLayout(output_shape)); + if (!SameElementType(input_shape, output_shape)) { + return false; + } + // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) && LayoutUtil::IsPadded(output_shape)) { return false; @@ -1106,6 +1110,10 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, CHECK(LayoutUtil::HasLayout(input_shape) && LayoutUtil::HasLayout(output_shape)); + if (!SameElementType(input_shape, output_shape)) { + return false; + } + // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) || LayoutUtil::IsPadded(output_shape)) { return false; -- GitLab From 6c6bd9524764c1b15d2dc791f88f5de8cf0b51c1 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 28 Feb 2018 22:58:19 -0800 Subject: [PATCH 189/311] [tf.data] Add optional `shuffle` argument to `Dataset.list_files()`. This option makes it easier to shuffle a set of filenames on each iteration, and default to true to match the recommended best practices when training on a large dataset. PiperOrigin-RevId: 187434282 --- .../list_files_dataset_op_test.py | 49 ++++++++++++++++--- tensorflow/python/data/ops/dataset_ops.py | 22 +++++++-- .../api/golden/tensorflow.data.-dataset.pbtxt | 2 +- ...ow.data.-fixed-length-record-dataset.pbtxt | 2 +- .../tensorflow.data.-t-f-record-dataset.pbtxt | 2 +- .../tensorflow.data.-text-line-dataset.pbtxt | 2 +- 6 files changed, 66 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py index 4e7691ee81..6442eb9ff5 100644 --- a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py @@ -46,8 +46,9 @@ class ListFilesDatasetOpTest(test.TestCase): dataset = dataset_ops.Dataset.list_files(path.join(self.tmp_dir, '*')) with self.test_session() as sess: itr = dataset.make_one_shot_iterator() + next_element = itr.get_next() with self.assertRaises(errors.OutOfRangeError): - sess.run(itr.get_next()) + sess.run(next_element) def testSimpleDirectory(self): filenames = ['a', 'b', 'c'] @@ -56,13 +57,14 @@ class ListFilesDatasetOpTest(test.TestCase): dataset = dataset_ops.Dataset.list_files(path.join(self.tmp_dir, '*')) with self.test_session() as sess: itr = dataset.make_one_shot_iterator() + next_element = itr.get_next() full_filenames = [] produced_filenames = [] for filename in filenames: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) with self.assertRaises(errors.OutOfRangeError): sess.run(itr.get_next()) @@ -73,12 +75,13 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*')}) with self.assertRaises(errors.OutOfRangeError): - sess.run(itr.get_next()) + sess.run(next_element) def testSimpleDirectoryInitializer(self): filenames = ['a', 'b', 'c'] @@ -89,6 +92,7 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*')}) @@ -98,7 +102,7 @@ class ListFilesDatasetOpTest(test.TestCase): for filename in filenames: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) @@ -114,6 +118,7 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py')}) @@ -123,7 +128,7 @@ class ListFilesDatasetOpTest(test.TestCase): for filename in filenames[1:-1]: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) with self.assertRaises(errors.OutOfRangeError): @@ -138,6 +143,7 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py*')}) @@ -147,13 +153,44 @@ class ListFilesDatasetOpTest(test.TestCase): for filename in filenames[1:]: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) with self.assertRaises(errors.OutOfRangeError): sess.run(itr.get_next()) + def testNoShuffle(self): + filenames = ['a', 'b', 'c'] + self._touchTempFiles(filenames) + + # Repeat the list twice and ensure that the order is the same each time. + # NOTE(mrry): This depends on an implementation detail of `list_files()`, + # which is that the list of files is captured when the iterator is + # initialized. Otherwise, or if e.g. the iterator were initialized more than + # once, it's possible that the non-determinism of `tf.matching_files()` + # would cause this test to fail. However, it serves as a useful confirmation + # that the `shuffle=False` argument is working as intended. + # TODO(b/73959787): Provide some ordering guarantees so that this test is + # more meaningful. + dataset = dataset_ops.Dataset.list_files( + path.join(self.tmp_dir, '*'), shuffle=False).repeat(2) + with self.test_session() as sess: + itr = dataset.make_one_shot_iterator() + next_element = itr.get_next() + + full_filenames = [] + produced_filenames = [] + for filename in filenames * 2: + full_filenames.append( + compat.as_bytes(path.join(self.tmp_dir, filename))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) + with self.assertRaises(errors.OutOfRangeError): + sess.run(itr.get_next()) + self.assertItemsEqual(full_filenames, produced_filenames) + self.assertEqual(produced_filenames[:len(filenames)], + produced_filenames[len(filenames):]) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 5751f35fe1..7c5aa4c767 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -36,6 +36,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gen_io_ops from tensorflow.python.ops import math_ops @@ -557,7 +558,7 @@ class Dataset(object): return PrefetchDataset(self, buffer_size) @staticmethod - def list_files(file_pattern): + def list_files(file_pattern, shuffle=None): """A dataset of all files matching a pattern. Example: @@ -570,16 +571,31 @@ class Dataset(object): - /path/to/dir/b.py - /path/to/dir/c.py - NOTE: The order of the file names returned can be non-deterministic. + NOTE: The order of the file names returned can be non-deterministic even + when `shuffle` is `False`. Args: file_pattern: A string or scalar string `tf.Tensor`, representing the filename pattern that will be matched. + shuffle: (Optional.) If `True`, the file names will be shuffled randomly. + Defaults to `True`. Returns: Dataset: A `Dataset` of strings corresponding to file names. """ - return Dataset.from_tensor_slices(gen_io_ops.matching_files(file_pattern)) + # TODO(b/73959787): Add a `seed` argument and make the `shuffle=False` + # behavior deterministic (e.g. by sorting the filenames). + if shuffle is None: + shuffle = True + matching_files = gen_io_ops.matching_files(file_pattern) + dataset = Dataset.from_tensor_slices(matching_files) + if shuffle: + # NOTE(mrry): The shuffle buffer size must be greater than zero, but the + # list of files might be empty. + buffer_size = math_ops.maximum( + array_ops.shape(matching_files, out_type=dtypes.int64)[0], 1) + dataset = dataset.shuffle(buffer_size) + return dataset def repeat(self, count=None): """Repeats this dataset `count` times. diff --git a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt index 42de5c0c80..0900adaf76 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt @@ -64,7 +64,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt index e2fc8d6cb1..7b16ac90c9 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt index 709ec127ce..9cf5f2ae20 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt index 7263230c1c..8c3d669143 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" -- GitLab From 65011bd51dcae889e631c6db46e7bcbf0d6843d1 Mon Sep 17 00:00:00 2001 From: Penghao Cen Date: Thu, 1 Mar 2018 16:16:37 +0800 Subject: [PATCH 190/311] Add default whl file location and minor update comments --- tensorflow/tools/dist_test/README.md | 8 ++++++++ tensorflow/tools/dist_test/local_test.sh | 22 ++++++++-------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index c1b1f79bbd..228d5ee35d 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -17,6 +17,14 @@ cesnsu model: ./local_test.sh --model_name CENSUS_WIDENDEEP +You can test specify version of TensorFlow: + +```shell +./local_test.sh ${whl_file_url} +``` + +For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu. + **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the test suite on it** diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 435f9d0dc9..caae7fd530 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -16,12 +16,11 @@ # # Tests distributed TensorFlow on a locally running TF GRPC cluster. # -# This script peforms the following steps: -# 1) Build the docker-in-docker (dind) image capable of running docker and -# Kubernetes (k8s) cluster inside. +# This script performs the following steps: +# 1) Build the docker image capable of running distributed TensorFlow in docker. # 2) Run a container from the aforementioned image and start docker service # in it -# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container +# 3) Call a script to launch a distributed TensorFlow GRPC cluster inside the container # and run the distributed test suite. # # Usage: local_test.sh @@ -64,15 +63,9 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -LOCAL_K8S_CACHE=${HOME}/kubernetes -# Helper function -get_container_id_by_image_name() { - # Get the id of a container by image name - # Usage: get_docker_container_id_by_image_name - - docker ps | grep $1 | awk '{print $1}' -} +# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below +DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" # Parse input arguments LEAVE_CONTAINER_RUNNING=0 @@ -84,7 +77,8 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - die "whl file location is not specified" + WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} + echo "use default whl file location" fi while true; do @@ -121,7 +115,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh -# Build docker-in-docker image for local k8s cluster. +# Build docker image for local distributed TensorFlow cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then -- GitLab From 46355f9065967dd39cd340b17d91a91f70d2c0c1 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Thu, 1 Mar 2018 01:44:33 -0800 Subject: [PATCH 191/311] Ensure folding of batch norms is idempotent. Added more rigorous testing. (Also fixed a couple of naming nits in the code as I looked through) PiperOrigin-RevId: 187446976 --- .../quantize/python/fold_batch_norms.py | 40 +++++++++++++------ .../python/quantize_parameterized_test.py | 23 ++++++++--- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 75d9eb0e58..1f0648bbb6 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -194,7 +194,7 @@ def _FindFusedBatchNorms(graph): layer_op = match_result.get_op(layer_pattern) layer_tensor = match_result.get_tensor(layer_pattern) bn_op = match_result.get_op(batch_norm_pattern) - batch_epsilon_tensor = bn_op.get_attr('epsilon') + batch_epsilon = bn_op.get_attr('epsilon') # In the MatMul case, the output of batch norm is reshaped back into a # 2D tensor, so the output_tensor is the output of the Reshape op. @@ -207,6 +207,11 @@ def _FindFusedBatchNorms(graph): continue output_tensor = output_reshape_op.outputs[0] + # Ensure that the output tensor has consumers, otherwise this is a dangling + # node and not a match. + if not output_tensor.consumers(): + continue + input_tensor = match_result.get_tensor(input_pattern) weight_tensor = match_result.get_tensor(weight_pattern) gamma_tensor = match_result.get_tensor(gamma_pattern) @@ -270,7 +275,7 @@ def _FindFusedBatchNorms(graph): moving_variance_tensor=moving_variance_tensor, bn_decay_mean_tensor=bn_decay_mean_tensor, bn_decay_var_tensor=bn_decay_var_tensor, - batch_epsilon_tensor=batch_epsilon_tensor) + batch_epsilon=batch_epsilon) def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, @@ -313,9 +318,8 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, g = ops.get_default_graph() with g.name_scope(context + '/batch_norm_correction'): recip_sigma_mv = math_ops.rsqrt( - match.moving_variance_tensor + match.batch_epsilon_tensor) - recip_sigma = math_ops.rsqrt( - match.variance_tensor + match.batch_epsilon_tensor) + match.moving_variance_tensor + match.batch_epsilon) + recip_sigma = math_ops.rsqrt(match.variance_tensor + match.batch_epsilon) correction_scale = math_ops.divide( recip_sigma_mv, recip_sigma, name='scale_compute') correction_scale = array_ops.identity( @@ -434,6 +438,9 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): for bn in common.BatchNormGroups(graph): has_scaling = _HasScaling(graph, input_to_ops_map, bn) + if not _IsValidUnfusedBatchNorm(graph, bn): + continue + # The mangling code intimately depends on BatchNorm node's internals. original_op, folded_op = _CreateFoldedOp( graph, @@ -462,6 +469,15 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): raise ValueError('Unexpected inputs to op: %s' % add_bypass.name) +def _IsValidUnfusedBatchNorm(graph, context): + """Checks that the output of the unfused batch norm has consumers.""" + add_shift = graph.get_operation_by_name( + context + '/BatchNorm/batchnorm/add_1') + # Ensure that the output tensor of batch norm has consumers, otherwise this + # is a dangling node and not a match. + return bool(add_shift.outputs[0].consumers()) + + def _GetBatchNormParams(graph, context, has_scaling): """Extracts relevant tensors for folding batch norms. @@ -478,7 +494,7 @@ def _GetBatchNormParams(graph, context, has_scaling): batch_variance_tensor = None moving_mean_tensor = None moving_variance_tensor = None - batch_epsilon_tensor = None + batch_epsilon = None bn_decay_mean_tensor = None bn_decay_var_tensor = None @@ -509,7 +525,7 @@ def _GetBatchNormParams(graph, context, has_scaling): if op.name.endswith(op_suffix_moving_variance): moving_variance_tensor = graph.get_tensor_by_name(op.name + ':0') if op.name.endswith(op_suffix_epsilon): - batch_epsilon_tensor = graph.get_tensor_by_name(op.name + ':0') + batch_epsilon = graph.get_tensor_by_name(op.name + ':0') if op.name.endswith(op_suffix_bn_decay_mean): bn_decay_mean_tensor = graph.get_tensor_by_name(op.name + ':0') if op.name.endswith(op_suffix_bn_decay_var): @@ -535,7 +551,7 @@ def _GetBatchNormParams(graph, context, has_scaling): moving_variance_tensor=moving_variance_tensor, bn_decay_mean_tensor=bn_decay_mean_tensor, bn_decay_var_tensor=bn_decay_var_tensor, - batch_epsilon_tensor=batch_epsilon_tensor) + batch_epsilon=batch_epsilon) def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, @@ -816,7 +832,7 @@ class _BatchNormMatch(object): def __init__(self, layer_op, bn_op, output_tensor, input_tensor, weight_tensor, gamma_tensor, beta_tensor, mean_tensor, variance_tensor, moving_mean_tensor, moving_variance_tensor, - bn_decay_mean_tensor, bn_decay_var_tensor, batch_epsilon_tensor): + bn_decay_mean_tensor, bn_decay_var_tensor, batch_epsilon): self._layer_op = layer_op self._bn_op = bn_op self._output_tensor = output_tensor @@ -830,7 +846,7 @@ class _BatchNormMatch(object): self._moving_variance_tensor = moving_variance_tensor self._bn_decay_mean_tensor = bn_decay_mean_tensor self._bn_decay_var_tensor = bn_decay_var_tensor - self._batch_epsilon_tensor = batch_epsilon_tensor + self._batch_epsilon = batch_epsilon @property def layer_op(self): @@ -877,8 +893,8 @@ class _BatchNormMatch(object): return self._moving_variance_tensor @property - def batch_epsilon_tensor(self): - return self._batch_epsilon_tensor + def batch_epsilon(self): + return self._batch_epsilon @property def bn_decay_mean_tensor(self): diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 639a7454a9..dd73f6c860 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -87,8 +87,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): update_barrier = control_flow_ops.no_op(name='update_barrier') with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, True, quant_delay=delay) + quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) @@ -130,6 +130,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_Conv2dWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( @@ -163,7 +164,6 @@ class QuantizeTest(test_util.TensorFlowTestCase): update_barrier = control_flow_ops.no_op(name='update_barrier') with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' @@ -205,6 +205,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_FCWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( @@ -239,7 +240,6 @@ class QuantizeTest(test_util.TensorFlowTestCase): update_barrier = control_flow_ops.no_op(name='update_barrier') with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' @@ -282,6 +282,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_DepthwiseConv2dWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( @@ -364,7 +365,6 @@ class QuantizeTest(test_util.TensorFlowTestCase): array_ops.identity(node, name='control_dependency') fold_batch_norms.FoldBatchNorms(graph, is_training=True) - quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' @@ -404,6 +404,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_FCWithBatchNorm(self): self._RunBatchNormTestOverParameters(self._TestQuantize_FCWithBatchNorm) @@ -487,6 +488,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_DepthwiseConv2dWithBatchNorm(self): self._RunBatchNormTestOverParameters( @@ -535,8 +537,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): array_ops.identity(node, name='control_dependency') fold_batch_norms.FoldBatchNorms(graph, is_training=True) - quantize.Quantize(graph, True, quant_delay=delay) + quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) @@ -574,6 +576,17 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) + + def _TestIdempotent(self, graph): + # Ensure that calling the rewrite again doesn't change the graph. + graph_def_before = str(graph.as_graph_def()) + with graph.as_default(): + # Ensuring that calling the rewrite again doesn't add more nodes. + fold_batch_norms.FoldBatchNorms(graph, is_training=True) + quantize.Quantize(graph, True) + graph_def_after = str(graph.as_graph_def()) + self.assertEqual(graph_def_before, graph_def_after) def _BatchNormParams(self, fused=False): return {'center': True, 'scale': True, 'decay': 1.0 - 0.003, 'fused': fused} -- GitLab From 2b7a7ee30666d160929c9aa3e941fbc94c17cc52 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:03:38 -0800 Subject: [PATCH 192/311] Add RegexReplace Op that internally calls RE2::Replace. PiperOrigin-RevId: 187467840 --- .../base_api/api_def_RegexReplace.pbtxt | 25 ++++++ tensorflow/core/kernels/BUILD | 8 ++ tensorflow/core/kernels/regex_replace_op.cc | 76 +++++++++++++++++++ tensorflow/core/ops/string_ops.cc | 14 ++++ tensorflow/python/kernel_tests/BUILD | 12 +++ .../kernel_tests/regex_replace_op_test.py | 71 +++++++++++++++++ tensorflow/python/ops/string_ops.py | 2 + tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + 8 files changed, 212 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt create mode 100644 tensorflow/core/kernels/regex_replace_op.cc create mode 100644 tensorflow/python/kernel_tests/regex_replace_op_test.py diff --git a/tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt b/tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt new file mode 100644 index 0000000000..70ad521926 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt @@ -0,0 +1,25 @@ +op { + graph_op_name: "RegexReplace" + in_arg { + name: "input" + description: "The text to be processed." + } + in_arg { + name: "pattern" + description: "The regular expression to match the input." + } + in_arg { + name: "rewrite" + description: "The rewrite to be applied to the matched expresion." + } + out_arg { + name: "output" + description: "The text after applying pattern and rewrite." + } + attr { + name: "replace_global" + description: "If True, the replacement is global, otherwise the replacement\nis done only on the first match." + } + summary: "Replaces the match of pattern in input with rewrite." + description: "It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)" +} diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 3426cf6e40..feacee5d63 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4155,6 +4155,7 @@ cc_library( ":as_string_op", ":base64_ops", ":reduce_join_op", + ":regex_replace_op", ":string_join_op", ":string_split_op", ":string_to_hash_bucket_op", @@ -4189,6 +4190,12 @@ tf_kernel_library( deps = STRING_DEPS, ) +tf_kernel_library( + name = "regex_replace_op", + prefix = "regex_replace_op", + deps = STRING_DEPS + ["@com_googlesource_code_re2//:re2"], +) + tf_kernel_library( name = "string_split_op", prefix = "string_split_op", @@ -5063,6 +5070,7 @@ filegroup( "scatter_nd_op*", "mutex_ops.*", "batch_kernels.*", + "regex_replace_op.cc", ], ), visibility = ["//visibility:public"], diff --git a/tensorflow/core/kernels/regex_replace_op.cc b/tensorflow/core/kernels/regex_replace_op.cc new file mode 100644 index 0000000000..59ec854a79 --- /dev/null +++ b/tensorflow/core/kernels/regex_replace_op.cc @@ -0,0 +1,76 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "re2/re2.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class RegexReplaceOp : public OpKernel { + public: + explicit RegexReplaceOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("replace_global", &replace_global_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + const auto& input_flat = input_tensor->flat(); + + const Tensor* pattern_tensor; + OP_REQUIRES_OK(ctx, ctx->input("pattern", &pattern_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(pattern_tensor->shape()), + errors::InvalidArgument("Pattern must be scalar, but received ", + pattern_tensor->shape().DebugString())); + const string pattern = pattern_tensor->flat()(0); + const RE2 match(pattern); + OP_REQUIRES(ctx, match.ok(), + errors::InvalidArgument("Invalid pattern: ", pattern, + ", error: ", match.error())); + + const Tensor* rewrite_tensor; + OP_REQUIRES_OK(ctx, ctx->input("rewrite", &rewrite_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(rewrite_tensor->shape()), + errors::InvalidArgument("Rewrite must be scalar, but received ", + rewrite_tensor->shape().DebugString())); + const string rewrite = rewrite_tensor->flat()(0); + + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output("output", input_tensor->shape(), + &output_tensor)); + auto output_flat = output_tensor->flat(); + for (size_t i = 0; i < input_flat.size(); ++i) { + output_flat(i) = input_flat(i); + if (replace_global_) { + RE2::GlobalReplace(&output_flat(i), match, rewrite); + } else { + RE2::Replace(&output_flat(i), match, rewrite); + } + } + } + + private: + bool replace_global_; +}; + +REGISTER_KERNEL_BUILDER(Name("RegexReplace").Device(DEVICE_CPU), + RegexReplaceOp); + +} // namespace tensorflow diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index e4c5bcfb54..05f216a83e 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -23,6 +23,20 @@ using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; +REGISTER_OP("RegexReplace") + .Input("input: string") + .Input("pattern: string") + .Input("rewrite: string") + .Output("output: string") + .Attr("replace_global: bool = true") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(0, c->input(0)); + return Status::OK(); + }); + REGISTER_OP("StringToHashBucketFast") .Input("input: string") .Output("output: int64") diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index c9aa4a252d..0f13e8bba5 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -712,6 +712,18 @@ cuda_py_test( ], ) +tf_py_test( + name = "regex_replace_op_test", + size = "small", + srcs = ["regex_replace_op_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:string_ops", + ], +) + tf_py_test( name = "save_restore_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/regex_replace_op_test.py b/tensorflow/python/kernel_tests/regex_replace_op_test.py new file mode 100644 index 0000000000..6739ac3224 --- /dev/null +++ b/tensorflow/python/kernel_tests/regex_replace_op_test.py @@ -0,0 +1,71 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for RegexReplace op from string_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class RegexReplaceOpTest(test.TestCase): + + def testRemovePrefix(self): + values = ["a:foo", "a:bar", "a:foo", "b:baz", "b:qux", "ca:b"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace( + input_vector, "^(a:|b:)", "", replace_global=False).eval() + self.assertAllEqual([b"foo", b"bar", b"foo", b"baz", b"qux", b"ca:b"], + stripped) + + def testRegexReplace(self): + values = ["aba\naba", "abcdabcde"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace(input_vector, "a.*a", "(\\0)").eval() + self.assertAllEqual([b"(aba)\n(aba)", b"(abcda)bcde"], stripped) + + def testEmptyMatch(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace(input_vector, "", "x").eval() + self.assertAllEqual([b"xaxbxcx", b"x1x"], stripped) + + def testInvalidPattern(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + invalid_pattern = "A[" + replace = string_ops.regex_replace(input_vector, invalid_pattern, "x") + with self.assertRaisesOpError("Invalid pattern"): + replace.eval() + + def testGlobal(self): + values = ["ababababab", "abcabcabc", ""] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace(input_vector, "ab", "abc", + True).eval() + self.assertAllEqual([b"abcabcabcabcabc", b"abccabccabcc", b""], stripped) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 0335d2456a..5bd75b9215 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -17,6 +17,7 @@ See the @{$python/string_ops} guide. +@@regex_replace @@string_to_hash_bucket_fast @@string_to_hash_bucket_strong @@string_to_hash_bucket @@ -139,6 +140,7 @@ def reduce_join(inputs, axis=None, reduce_join.__doc__ = deprecation.rewrite_argument_docstring( gen_string_ops.reduce_join.__doc__, "reduction_indices", "axis") +ops.NotDifferentiable("RegexReplace") ops.NotDifferentiable("StringToHashBucket") ops.NotDifferentiable("StringToHashBucketFast") ops.NotDifferentiable("StringToHashBucketStrong") diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 2333736583..8c9e7af89b 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1600,6 +1600,10 @@ tf_module { name: "reduce_sum" argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "regex_replace" + argspec: "args=[\'input\', \'pattern\', \'rewrite\', \'replace_global\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + } member_method { name: "register_tensor_conversion_function" argspec: "args=[\'base_type\', \'conversion_func\', \'priority\'], varargs=None, keywords=None, defaults=[\'100\'], " -- GitLab From 8a06526e9ac4cd47c14975bd75640966bd11daf9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:18:11 -0800 Subject: [PATCH 193/311] Update ops-related pbtxt files. PiperOrigin-RevId: 187468981 --- .../core/ops/compat/ops_history.v1.pbtxt | 26 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 26 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index dddde1624a..35c49658b3 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -37666,6 +37666,32 @@ op { } allows_uninitialized_input: true } +op { + name: "RegexReplace" + input_arg { + name: "input" + type: DT_STRING + } + input_arg { + name: "pattern" + type: DT_STRING + } + input_arg { + name: "rewrite" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_STRING + } + attr { + name: "replace_global" + type: "bool" + default_value { + b: true + } + } +} op { name: "Relu" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 55be0519a7..bf7682712c 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -19353,6 +19353,32 @@ op { } allows_uninitialized_input: true } +op { + name: "RegexReplace" + input_arg { + name: "input" + type: DT_STRING + } + input_arg { + name: "pattern" + type: DT_STRING + } + input_arg { + name: "rewrite" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_STRING + } + attr { + name: "replace_global" + type: "bool" + default_value { + b: true + } + } +} op { name: "Relu" input_arg { -- GitLab From 166980803009ec4577806b4437579159f5e9dd5a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:25:38 -0800 Subject: [PATCH 194/311] Support 0 size literals in Literal::Slice PiperOrigin-RevId: 187469563 --- tensorflow/compiler/xla/literal_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index c3eb8caa57..a345e95a8b 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -813,7 +813,7 @@ std::unique_ptr Literal::Slice( CHECK_GE(start_indices[dnum], 0); CHECK_LE(limit_indices[dnum], shape().dimensions(dnum)); int64 dimension = limit_indices[dnum] - start_indices[dnum]; - CHECK_GT(dimension, 0); + CHECK_GE(dimension, 0); result_dimensions.push_back(dimension); } const auto result_shape = -- GitLab From bf048d60fbf68fd731df6b2f2ff36a5722b73bb8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:45:58 -0800 Subject: [PATCH 195/311] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 187471483 --- tensorflow/go/op/wrappers.go | 1486 +++++++++++++++++----------------- 1 file changed, 743 insertions(+), 743 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index d9e684a661..336df7c2f7 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -384,122 +384,6 @@ func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs t return op.Output(0), op.Output(1), op.Output(2) } -// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. -type MutableHashTableOfTensorsV2Attr func(optionalAttr) - -// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. -// If not specified, defaults to <> -func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["value_shape"] = value - } -} - -// Creates an empty hash table. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a vector. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MutableHashTableOfTensorsV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. -type ResourceApplyProximalAdagradAttr func(optionalAttr) - -// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. -// -// accum += grad * grad -// prox_v = var - lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyProximalAdagrad", - Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // MutableHashTableV2Attr is an optional argument to MutableHashTableV2. type MutableHashTableV2Attr func(optionalAttr) @@ -564,142 +448,6 @@ func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.Data return op.Output(0) } -// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. -type MapUnstageNoKeyAttr func(optionalAttr) - -// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns a random (key, value) -// -// from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapUnstageNoKey", - Input: []tf.Input{ - indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstageNoKey", err) - return - } - return key, values -} - -// HashTableV2Attr is an optional argument to HashTableV2. -type HashTableV2Attr func(optionalAttr) - -// HashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func HashTableV2Container(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// HashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func HashTableV2SharedName(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// Creates a non-initialized hash table. -// -// This op creates a hash table, specifying the type of its keys and values. -// Before using the table you will have to initialize it. After initialization the -// table will be immutable. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "HashTableV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Replaces the contents of the table with the specified keys and values. // // The tensor `keys` must be of the same type as the keys of the table. @@ -5642,113 +5390,8 @@ func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_f return op.Output(0), op.Output(1), op.Output(2) } -// SummaryWriterAttr is an optional argument to SummaryWriter. -type SummaryWriterAttr func(optionalAttr) - -// SummaryWriterSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func SummaryWriterSharedName(value string) SummaryWriterAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// SummaryWriterContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func SummaryWriterContainer(value string) SummaryWriterAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// Returns a handle to be used to access a summary writer. -// -// The summary writer is an in-graph resource which can be used by ops to write -// summaries to event files. -// -// Returns the summary writer resource. Scalar handle. -func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SummaryWriter", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for SparseSegmentMean. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Applies softmax to a batched N-D `SparseTensor`. -// -// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` -// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. -// -// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost -// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly -// zero elements do not participate*. Specifically, the algorithm is equivalent -// to the following: -// -// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix -// with shape `[B, C]`, along the size-C dimension; -// (2) Masks out the original implicitly-zero locations; -// (3) Renormalizes the remaining elements. -// -// Hence, the `SparseTensor` result has exactly the same non-zero indices and -// shape. -// -// Arguments: -// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a -// SparseTensor, in canonical ordering. -// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// -// Returns 1-D. The `NNZ` values for the result `SparseTensor`. -func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSoftmax", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomPoissonAttr is an optional argument to RandomPoisson. -type RandomPoissonAttr func(optionalAttr) +// RandomPoissonAttr is an optional argument to RandomPoisson. +type RandomPoissonAttr func(optionalAttr) // RandomPoissonSeed sets the optional seed attribute to value. // If not specified, defaults to 0 @@ -7025,67 +6668,32 @@ func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyReso return scope.AddOperation(opspec) } -// CumprodAttr is an optional argument to Cumprod. -type CumprodAttr func(optionalAttr) +// SummaryWriterAttr is an optional argument to SummaryWriter. +type SummaryWriterAttr func(optionalAttr) -// CumprodExclusive sets the optional exclusive attribute to value. -// -// value: If `True`, perform exclusive cumprod. -// If not specified, defaults to false -func CumprodExclusive(value bool) CumprodAttr { +// SummaryWriterSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func SummaryWriterSharedName(value string) SummaryWriterAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["shared_name"] = value } } -// CumprodReverse sets the optional reverse attribute to value. -// -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumprodReverse(value bool) CumprodAttr { +// SummaryWriterContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func SummaryWriterContainer(value string) SummaryWriterAttr { return func(m optionalAttr) { - m["reverse"] = value + m["container"] = value } } -// Compute the cumulative product of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumprod, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is -// performed instead: -// -// ```python -// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] -// ``` -// -// By setting the `reverse` kwarg to `True`, the cumprod is performed in the -// opposite direction: -// -// ```python -// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] -// ``` -// -// This is more efficient than using separate `tf.reverse` ops. -// -// The `reverse` and `exclusive` kwargs can also be combined: +// Returns a handle to be used to access a summary writer. // -// ```python -// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] -// ``` +// The summary writer is an in-graph resource which can be used by ops to write +// summaries to event files. // -// Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { +// Returns the summary writer resource. Scalar handle. +func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) { if scope.Err() != nil { return } @@ -7094,93 +6702,347 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "Cumprod", - Input: []tf.Input{ - x, axis, - }, + Type: "SummaryWriter", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the mean along segments of a tensor. -// -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is -// over `j` such that `segment_ids[j] == i` and `N` is the total number of -// values summed. -// -// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// Computes gradients for SparseSegmentMean. // -//
-// -//
+// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. // // Arguments: -// -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// grad: gradient propagated to the SparseSegmentMean op. +// indices: indices passed to the corresponding SparseSegmentMean op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. +func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SegmentMean", + Type: "SparseSegmentMeanGrad", Input: []tf.Input{ - data, segment_ids, + grad, indices, segment_ids, output_dim0, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. -type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) - -// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the centered RMSProp algorithm. +// Applies softmax to a batched N-D `SparseTensor`. // -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. +// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` +// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. // -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. +// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost +// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly +// zero elements do not participate*. Specifically, the algorithm is equivalent +// to the following: // -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix +// with shape `[B, C]`, along the size-C dimension; +// (2) Masks out the original implicitly-zero locations; +// (3) Renormalizes the remaining elements. // -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// Hence, the `SparseTensor` result has exactly the same non-zero indices and +// shape. // // Arguments: -// var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. +// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a +// SparseTensor, in canonical ordering. +// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 1-D. The `NNZ` values for the result `SparseTensor`. +func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSoftmax", + Input: []tf.Input{ + sp_indices, sp_values, sp_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Partitions `data` into `num_partitions` tensors using indices from `partitions`. +// +// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` +// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` +// are placed in `outputs[i]` in lexicographic order of `js`, and the first +// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. +// In detail, +// +// ```python +// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] +// +// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) +// ``` +// +// `data.shape` must start with `partitions.shape`. +// +// For example: +// +// ```python +// # Scalar partitions. +// partitions = 1 +// num_partitions = 2 +// data = [10, 20] +// outputs[0] = [] # Empty with shape [0, 2] +// outputs[1] = [[10, 20]] +// +// # Vector partitions. +// partitions = [0, 0, 1, 1, 0] +// num_partitions = 2 +// data = [10, 20, 30, 40, 50] +// outputs[0] = [10, 20, 50] +// outputs[1] = [30, 40] +// ``` +// +// See `dynamic_stitch` for an example on how to merge partitions back. +// +//
+// +//
+// +// Arguments: +// +// partitions: Any shape. Indices in the range `[0, num_partitions)`. +// num_partitions: The number of partitions to output. +func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_partitions": num_partitions} + opspec := tf.OpSpec{ + Type: "DynamicPartition", + Input: []tf.Input{ + data, partitions, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("DynamicPartition", err) + return + } + return outputs +} + +// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. +type ResourceApplyAdagradAttr func(optionalAttr) + +// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the adagrad scheme. +// +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyAdagrad", + Input: []tf.Input{ + var_, accum, lr, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// CumprodAttr is an optional argument to Cumprod. +type CumprodAttr func(optionalAttr) + +// CumprodExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumprod. +// If not specified, defaults to false +func CumprodExclusive(value bool) CumprodAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } +} + +// CumprodReverse sets the optional reverse attribute to value. +// +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumprodReverse(value bool) CumprodAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative product of the tensor `x` along `axis`. +// +// By default, this op performs an inclusive cumprod, which means that the first +// element of the input is identical to the first element of the output: +// +// ```python +// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] +// ``` +// +// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is +// performed instead: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] +// ``` +// +// By setting the `reverse` kwarg to `True`, the cumprod is performed in the +// opposite direction: +// +// ```python +// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] +// ``` +// +// This is more efficient than using separate `tf.reverse` ops. +// +// The `reverse` and `exclusive` kwargs can also be combined: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] +// ``` +// +// Arguments: +// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, +// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, +// `complex128`, `qint8`, `quint8`, `qint32`, `half`. +// axis: A `Tensor` of type `int32` (default: 0). Must be in the range +// `[-rank(x), rank(x))`. +func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Cumprod", + Input: []tf.Input{ + x, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the mean along segments of a tensor. +// +// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of +// segments. +// +// Computes a tensor such that +// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is +// over `j` such that `segment_ids[j] == i` and `N` is the total number of +// values summed. +// +// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMean", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. +type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) + +// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var, mg, ms, and mom tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the centered RMSProp algorithm. +// +// The centered RMSProp algorithm uses an estimate of the centered second moment +// (i.e., the variance) for normalization, as opposed to regular RMSProp, which +// uses the (uncentered) second moment. This often helps with training, but is +// slightly more expensive in terms of computation and memory. +// +// Note that in dense implementation of this algorithm, mg, ms, and mom will +// update even if the grad is zero, but in this sparse implementation, mg, ms, +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// mean_grad = decay * mean_grad + (1-decay) * gradient +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom +// +// Arguments: +// var_: Should be from a Variable(). +// mg: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. // rho: Decay rate. Must be a scalar. // // epsilon: Ridge term. Must be a scalar. @@ -7909,63 +7771,6 @@ func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, upd return scope.AddOperation(opspec) } -// StageSizeAttr is an optional argument to StageSize. -type StageSizeAttr func(optionalAttr) - -// StageSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageSizeCapacity(value int64) StageSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// StageSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageSizeMemoryLimit(value int64) StageSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StageSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StageSizeContainer(value string) StageSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StageSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StageSizeSharedName(value string) StageSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of elements in the underlying container. -func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StageSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // NonMaxSuppressionAttr is an optional argument to NonMaxSuppression. type NonMaxSuppressionAttr func(optionalAttr) @@ -8702,121 +8507,7 @@ func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Outp Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Partitions `data` into `num_partitions` tensors using indices from `partitions`. -// -// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` -// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` -// are placed in `outputs[i]` in lexicographic order of `js`, and the first -// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. -// In detail, -// -// ```python -// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] -// -// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) -// ``` -// -// `data.shape` must start with `partitions.shape`. -// -// For example: -// -// ```python -// # Scalar partitions. -// partitions = 1 -// num_partitions = 2 -// data = [10, 20] -// outputs[0] = [] # Empty with shape [0, 2] -// outputs[1] = [[10, 20]] -// -// # Vector partitions. -// partitions = [0, 0, 1, 1, 0] -// num_partitions = 2 -// data = [10, 20, 30, 40, 50] -// outputs[0] = [10, 20, 50] -// outputs[1] = [30, 40] -// ``` -// -// See `dynamic_stitch` for an example on how to merge partitions back. -// -//
-// -//
-// -// Arguments: -// -// partitions: Any shape. Indices in the range `[0, num_partitions)`. -// num_partitions: The number of partitions to output. -func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_partitions": num_partitions} - opspec := tf.OpSpec{ - Type: "DynamicPartition", - Input: []tf.Input{ - data, partitions, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("DynamicPartition", err) - return - } - return outputs -} - -// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. -type ResourceApplyAdagradAttr func(optionalAttr) - -// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the adagrad scheme. -// -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdagrad", - Input: []tf.Input{ - var_, accum, lr, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) + return op.Output(0) } // Returns element-wise remainder of division. This emulates C semantics in that @@ -9482,83 +9173,335 @@ func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ... Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1) +} + +// MaxPool3DAttr is an optional argument to MaxPool3D. +type MaxPool3DAttr func(optionalAttr) + +// MaxPool3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DDataFormat(value string) MaxPool3DAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs 3D max pooling on the input. +// +// Arguments: +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor. +func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPool3D", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the gradients of 3-D convolution with respect to the input. +// +// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 +// +// Arguments: +// input: Shape `[batch, depth, rows, cols, in_channels]`. +// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. +// `in_channels` must match between `input` and `filter`. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + opspec := tf.OpSpec{ + Type: "Conv3DBackpropInput", + Input: []tf.Input{ + input, filter, out_backprop, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. +type ResourceApplyProximalAdagradAttr func(optionalAttr) + +// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. +// +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. +// +// accum += grad * grad +// prox_v = var - lr * grad * (1 / sqrt(accum)) +// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyProximalAdagrad", + Input: []tf.Input{ + var_, accum, lr, l1, l2, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. +type MutableHashTableOfTensorsV2Attr func(optionalAttr) + +// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. +// +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// If not specified, defaults to false +func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. +// If not specified, defaults to <> +func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["value_shape"] = value + } +} + +// Creates an empty hash table. +// +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a vector. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. +// +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MutableHashTableOfTensorsV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// HashTableV2Attr is an optional argument to HashTableV2. +type HashTableV2Attr func(optionalAttr) + +// HashTableV2Container sets the optional container attribute to value. +// +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func HashTableV2Container(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// HashTableV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func HashTableV2SharedName(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// Creates a non-initialized hash table. +// +// This op creates a hash table, specifying the type of its keys and values. +// Before using the table you will have to initialize it. After initialization the +// table will be immutable. +// +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "HashTableV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// MaxPool3DAttr is an optional argument to MaxPool3D. -type MaxPool3DAttr func(optionalAttr) +// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. +type MapUnstageNoKeyAttr func(optionalAttr) -// MaxPool3DDataFormat sets the optional data_format attribute to value. +// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DDataFormat(value string) MaxPool3DAttr { +// REQUIRES: value >= 0 +func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["data_format"] = value + m["capacity"] = value } } -// Performs 3D max pooling on the input. +// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// REQUIRES: value >= 0 +func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns a random (key, value) // -// Returns The max pooled output tensor. -func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { +// from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool3D", + Type: "MapUnstageNoKey", Input: []tf.Input{ - input, + indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradients of 3-D convolution with respect to the input. -// -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "Conv3DBackpropInput", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, + var idx int + var err error + key = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapUnstageNoKey", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return key, values } // Inverse 2D fast Fourier transform. @@ -12257,6 +12200,63 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D return op.Output(0) } +// StageSizeAttr is an optional argument to StageSize. +type StageSizeAttr func(optionalAttr) + +// StageSizeCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageSizeCapacity(value int64) StageSizeAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// StageSizeMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageSizeMemoryLimit(value int64) StageSizeAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// StageSizeContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StageSizeContainer(value string) StageSizeAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// StageSizeSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StageSizeSharedName(value string) StageSizeAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op returns the number of elements in the underlying container. +func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StageSize", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Produces the max pool of the input tensor for quantized types. // // Arguments: @@ -12999,6 +12999,56 @@ func Neg(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. +type FakeQuantWithMinMaxVarsAttr func(optionalAttr) + +// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { + return func(m optionalAttr) { + m["num_bits"] = value + } +} + +// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. +// If not specified, defaults to false +func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { + return func(m optionalAttr) { + m["narrow_range"] = value + } +} + +// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` +// +// and `max` to 'outputs' tensor of same shape as `inputs`. +// +// `[min; max]` define the clamping range for the `inputs` data. +// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` +// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and +// then de-quantized and output as floats in `[min; max]` interval. +// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// +// This operation has a gradient and thus allows for training `min` and `max` +// values. +func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FakeQuantWithMinMaxVars", + Input: []tf.Input{ + inputs, min, max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Writes a `Summary` protocol buffer with a histogram. // // The generated @@ -28230,53 +28280,3 @@ func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf. op := scope.AddOperation(opspec) return op.Output(0), op.Output(1), op.Output(2) } - -// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. -type FakeQuantWithMinMaxVarsAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` -// -// and `max` to 'outputs' tensor of same shape as `inputs`. -// -// `[min; max]` define the clamping range for the `inputs` data. -// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` -// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and -// then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. -// -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVars", - Input: []tf.Input{ - inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From 5110763dc8e71ca5331144e6a837d0f3886bcbd9 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Fri, 2 Mar 2018 00:34:36 +0800 Subject: [PATCH 196/311] Fix some minor typos in get started docs to keep consistent (#17357) --- tensorflow/docs_src/get_started/checkpoints.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/get_started/checkpoints.md b/tensorflow/docs_src/get_started/checkpoints.md index dfa2110e69..4aa07c7f2a 100644 --- a/tensorflow/docs_src/get_started/checkpoints.md +++ b/tensorflow/docs_src/get_started/checkpoints.md @@ -154,7 +154,7 @@ classifier = tf.estimator.DNNClassifier( The first time you call an Estimator's `train` method, TensorFlow saves a checkpoint to the `model_dir`. Each subsequent call to the Estimator's -`train`, `eval`, or `predict` method causes the following: +`train`, `evaluate`, or `predict` method causes the following: 1. The Estimator builds the model's [graph](https://developers.google.com/machine-learning/glossary/#graph) @@ -222,7 +222,7 @@ does not match the shape stored in checkpoint: [20] To run experiments in which you train and compare slightly different versions of a model, save a copy of the code that created each -`model-dir`, possibly by creating a separate git branch for each version. +`model_dir`, possibly by creating a separate git branch for each version. This separation will keep your checkpoints recoverable. ## Summary -- GitLab From 873768ca8e9eebb1e0985b6fd4fe8d56ad2389ff Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Thu, 1 Mar 2018 08:41:55 -0800 Subject: [PATCH 197/311] Fix link text PiperOrigin-RevId: 187483166 --- tensorflow/docs_src/performance/quantization.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 63448c2ebe..411889cb1c 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -80,8 +80,8 @@ need for a separate calibration step. TensorFlow can train models with quantization in the loop. Because training requires small gradient adjustments, floating point values are still used. To keep models as floating point while adding the quantization error in the training -loop, @{$array_ops#Fake_quantization} nodes simulate the effect of quantization -in the forward and backward passes. +loop, @{$array_ops#Fake_quantization$fake quantization} nodes simulate the +effect of quantization in the forward and backward passes. Since it's difficult to add these fake quantization operations to all the required locations in the model, there's a function available that rewrites the -- GitLab From 88a13b85c9559e1a14e25f36c26fb4f95fd63dde Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 1 Mar 2018 08:44:45 -0800 Subject: [PATCH 198/311] [XLA] Fix signatures of c_foo functions and add c_any_of. Embarrassingly, we were often copying the container in c_foo. Oops. This fixes that, and also adds some perfect forwarding that was missing. It also adds a c_any_of function. PiperOrigin-RevId: 187483435 --- tensorflow/compiler/xla/util.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index e14c8cefa1..82e5a59da0 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -427,30 +427,37 @@ std::vector> CommonFactors( string SanitizeFileName(string file_name); template -bool c_all_of(Container container, Predicate&& predicate) { +bool c_all_of(const Container& container, Predicate&& predicate) { return std::all_of(std::begin(container), std::end(container), std::forward(predicate)); } +template +bool c_any_of(const Container& container, Predicate&& predicate) { + return std::any_of(std::begin(container), std::end(container), + std::forward(predicate)); +} + template -OutputIterator c_transform(InputContainer input_container, +OutputIterator c_transform(const InputContainer& input_container, OutputIterator output_iterator, - UnaryOperation unary_op) { + UnaryOperation&& unary_op) { return std::transform(std::begin(input_container), std::end(input_container), - output_iterator, unary_op); + output_iterator, + std::forward(unary_op)); } template -OutputIterator c_copy_if(InputContainer input_container, +OutputIterator c_copy_if(const InputContainer& input_container, OutputIterator output_iterator, - UnaryPredicate predicate) { + UnaryPredicate&& predicate) { return std::copy_if(std::begin(input_container), std::end(input_container), - output_iterator, predicate); + output_iterator, std::forward(predicate)); } template -OutputIterator c_copy(InputContainer input_container, +OutputIterator c_copy(const InputContainer& input_container, OutputIterator output_iterator) { return std::copy(std::begin(input_container), std::end(input_container), output_iterator); @@ -468,7 +475,7 @@ void c_sort(InputContainer& input_container, Comparator&& comparator) { } template -bool c_binary_search(Sequence& sequence, T&& value) { +bool c_binary_search(const Sequence& sequence, T&& value) { return std::binary_search(std::begin(sequence), std::end(sequence), std::forward(value)); } -- GitLab From c4cc731f4f92f76dfd5f09b87c9c4acbabaace46 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 08:55:41 -0800 Subject: [PATCH 199/311] Fix TF doc style. PiperOrigin-RevId: 187484534 --- tensorflow/docs_src/community/roadmap.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/docs_src/community/roadmap.md b/tensorflow/docs_src/community/roadmap.md index 1f934acab6..a3170a10f2 100644 --- a/tensorflow/docs_src/community/roadmap.md +++ b/tensorflow/docs_src/community/roadmap.md @@ -75,8 +75,7 @@ across image recognition, speech, object detection, and ### Community and Partner Engagement #### Special Interest Groups: * Mobilizing the community to work together in focused domains -* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute) -: build and packaging of TensorFlow +* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute): build and packaging of TensorFlow * More to be identified and launched #### Community: -- GitLab From 03de984caa1f1403d4417357b67e96dfb7edbc3e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 09:10:17 -0800 Subject: [PATCH 200/311] Correct struct array initialization syntax. PiperOrigin-RevId: 187486332 --- tensorflow/python/eager/pywrap_tensor.cc | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 3ec2109d32..d3aaede749 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -520,16 +520,11 @@ PyTypeObject* EagerTensorType = nullptr; #if PY_MAJOR_VERSION >= 3 static PyType_Slot EagerTensor_Type_slots[] = { - Py_tp_dealloc, - reinterpret_cast(EagerTensor_dealloc), - Py_tp_methods, - reinterpret_cast(EagerTensor_methods), - Py_tp_getset, - reinterpret_cast(EagerTensor_getseters), - Py_tp_init, - reinterpret_cast(EagerTensor_init), - 0, - nullptr, + {Py_tp_dealloc, reinterpret_cast(EagerTensor_dealloc)}, + {Py_tp_methods, reinterpret_cast(EagerTensor_methods)}, + {Py_tp_getset, reinterpret_cast(EagerTensor_getseters)}, + {Py_tp_init, reinterpret_cast(EagerTensor_init)}, + {0, nullptr}, }; PyType_Spec EagerTensor_Type_spec = {"EagerTensor", sizeof(EagerTensor), 0, -- GitLab From c65343d282cdf5ccf4f7d3229f6c492fec344f8d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 1 Mar 2018 09:27:57 -0800 Subject: [PATCH 201/311] Keep track of eager op device for tensor handles. Force-colocates ops using resources with the resources. PiperOrigin-RevId: 187488175 --- tensorflow/c/eager/c_api.cc | 37 +++++++++++++++++++++------ tensorflow/c/eager/c_api_internal.h | 10 ++++++-- tensorflow/python/eager/core_test.py | 14 ++++++++++ tensorflow/python/lib/core/py_func.cc | 5 ++-- 4 files changed, 54 insertions(+), 12 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 29c709b06d..252ceab54a 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -159,7 +159,7 @@ TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { tensorflow::Tensor tensor; status->status = tensorflow::TF_TensorToTensor(t, &tensor); if (!status->status.ok()) return nullptr; - return new TFE_TensorHandle(tensor, nullptr); + return new TFE_TensorHandle(tensor, nullptr, nullptr); } void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { delete h; } @@ -222,7 +222,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, // has device type XLA_CPU, and the other CPU. const bool both_on_cpu = src_cpu && dst_cpu; if (is_same_device || both_on_cpu) { - return new TFE_TensorHandle(h->t, dst_cpu ? nullptr : dstd); + dstd = dst_cpu ? nullptr : dstd; + return new TFE_TensorHandle(h->t, dstd, dstd); } tensorflow::Tensor* src = &(h->t); if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT && @@ -241,7 +242,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, } tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape()); if (src->shape().num_elements() == 0) { - return new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd); + dstd = dst_cpu ? nullptr : dstd; + return new TFE_TensorHandle(dst, dstd, dstd); } tensorflow::DeviceContext* src_device_context = nullptr; if (!src_cpu) { @@ -269,7 +271,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, }); n.WaitForNotification(); return (TF_GetCode(status) == TF_OK) - ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd) + ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd, + dst_cpu ? nullptr : dstd) : nullptr; } @@ -325,6 +328,7 @@ void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { if (!status->status.ok()) return; op->inputs.push_back(h->t); op->input_devices.push_back(h->d); + op->input_op_devices.push_back(h->op_device); op->attrs.NumInputs(op->inputs.size()); } @@ -540,7 +544,8 @@ tensorflow::Status ValidateInputTypeAndPlacement( } // We are only here if the policy is warn or silent copies, so we should // trigger a copy. - TFE_TensorHandle original{op->inputs[i], op->input_devices[i]}; + TFE_TensorHandle original{op->inputs[i], op->input_devices[i], + op->device}; TF_Status* s = TF_NewStatus(); TFE_TensorHandle* copied_tensor = TFE_TensorHandleCopyToDevice( &original, ctx, expected_device->name().c_str(), s); @@ -744,6 +749,7 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { // via `op_input_to_func_input`, adjust the actual inputs accordingly. launch_op->inputs = op->inputs; launch_op->input_devices = op->input_devices; + launch_op->input_op_devices = op->input_op_devices; if (!op_input_to_func_input.empty()) { DCHECK_EQ(op->inputs.size(), op_input_to_func_input.size()); if (!op->input_devices.empty()) { @@ -832,9 +838,24 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, op = xla_launch_op.get(); } #endif // TENSORFLOW_EAGER_USE_XLA - TFE_Context* ctx = op->ctx; tensorflow::Device* device = op->device; + // Ensure all resource-touching ops run in the device the resource is, + // regardless of anything else that has been specified. This is identical to + // the graph mode behavior. + for (int i = 0; i < op->inputs.size(); ++i) { + if (op->inputs[i].dtype() == tensorflow::DT_RESOURCE && + op->input_op_devices[i] != device) { + tensorflow::Device* d = op->input_op_devices[i] == nullptr + ? ctx->devices()[0] + : op->input_op_devices[i]; + VLOG(1) << "Changing device of operation " << op->name << " to " + << d->name() << " because input #" << i + << " is a resource in this device."; + device = d; + op->device = d; + } + } if (!ctx->soft_placement && device == nullptr) { // TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU device = ctx->devices()[0]; @@ -968,7 +989,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, (*output_memory_types)[i] == tensorflow::HOST_MEMORY) { d = nullptr; } - retvals[i] = new TFE_TensorHandle(outputs[i], d); + retvals[i] = new TFE_TensorHandle(outputs[i], d, device); } } @@ -994,7 +1015,7 @@ void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function, } // extern "C" TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) { - return new TFE_TensorHandle(t, nullptr); + return new TFE_TensorHandle(t, nullptr, nullptr); } const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 53c21b64cb..145e4c95cf 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -101,8 +101,9 @@ struct TFE_Context { }; struct TFE_TensorHandle { - TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d) - : t(t), d(d) {} + TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d, + tensorflow::Device* op_device) + : t(t), d(d), op_device(op_device) {} tensorflow::Tensor t; // TODO(ashankar): d == nullptr iff local CPU @@ -114,6 +115,10 @@ struct TFE_TensorHandle { // TODO(ashankar): Reference count TFE_Context to ensure that 'd' of a // TFE_TensorHandle does not outlive the TFE_Context from which it came? tensorflow::Device* d; + + // Device in which the op producing this tensor was executed. Equals to d for + // constant tensors. + tensorflow::Device* op_device; }; struct TFE_Op { @@ -130,6 +135,7 @@ struct TFE_Op { const tensorflow::AttrTypeMap* attr_types; std::vector inputs; std::vector input_devices; + std::vector input_op_devices; tensorflow::Device* device; bool use_xla = false; }; diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 0e40d8a5c0..e418be5fae 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -34,7 +34,9 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import resource_variable_ops def execute(op_name, num_outputs, inputs, attrs=None): @@ -181,6 +183,18 @@ class TFETest(test_util.TensorFlowTestCase): attrs=('T', x.dtype.as_datatype_enum))[0].cpu().numpy() self.assertEqual(3, result) + def testResourceTensorPlacement(self): + if not context.context().num_gpus(): + self.skipTest('No GPUs found') + + with context.device('gpu:0'): + v = resource_variable_ops.ResourceVariable(1.0) + with context.device('cpu:0'): + # Check that even though we specified the cpu device we'll run the read op + # in the device where the handle is. + self.assertAllEqual( + gen_resource_variable_ops.read_variable_op(v.handle, v.dtype), 1.0) + def testCopyBetweenDevices(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index e0422ef80a..343415b264 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -79,10 +79,11 @@ Status MakeArgTuple(const PyCall* call, PyObject** tuple) { const Tensor& t = call->ins[i]; if (call->eager) { if (call->gpu) { - arg = EagerTensorFromHandle(new TFE_TensorHandle(t, call->device)); + arg = EagerTensorFromHandle( + new TFE_TensorHandle(t, call->device, call->device)); } else { // TFE_TensorHandle assumes that CPU is identified by `nullptr`. - arg = EagerTensorFromHandle(new TFE_TensorHandle(t, nullptr)); + arg = EagerTensorFromHandle(new TFE_TensorHandle(t, nullptr, nullptr)); } if (arg == nullptr) { return errors::Internal("Unable to procure EagerTensor from Tensor."); -- GitLab From 2c4eca575e1fc36c7b2f1d1c312426ff4c4cec16 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 1 Mar 2018 09:31:20 -0800 Subject: [PATCH 202/311] [XLA] Don't dump the "contents" of constants with a zero-sized dimension in the HLO graph dumper. Previously we'd dump e.g. "{ {}, {}, ... }" for an f32[100, 0], which is just noise. PiperOrigin-RevId: 187488625 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 2861fec39e..99c4932a38 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -782,6 +782,14 @@ string HloDotDumper::GetInstructionNodeInlinedOperands( auto stringify_constant = [](const HloInstruction* constant) { const auto& shape = constant->shape(); + // If the shape has a dimension of size zero, print it as e.g. + // "{} (f32[42, 0, 10])". The alternative, calling Literal::ToString(), + // enumerates all of its empty dimensions (e.g. "{ { {}, {} }, ..."), which + // is just noise. + if (ShapeUtil::HasZeroElements(shape)) { + return Printf("{} (%s)", ShapeUtil::HumanString(constant->shape())); + } + // Print the literal value of constants with <= K elements. optional elem_count; if (!ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)) { -- GitLab From 7129d6a0746d0798e0a3015f645697b0fee12c37 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 09:52:00 -0800 Subject: [PATCH 203/311] Fixed tf.reduce_sum usage on 2-D tensors. PiperOrigin-RevId: 187491311 --- .../resolve_constant_unary.cc | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index f227554bc5..d96b3d522d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -138,12 +138,32 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { memcpy(output_float_data.data(), (*input_float_data).data(), output_buffer_size * sizeof(output_float_data[0])); } else if (unary_op->type == OperatorType::kTensorFlowSum) { - // At the moment only full reduction across all dimensions is supported. - float sum = 0.f; - for (int i = 0; i < input_buffer_size; i++) { - sum += (*input_float_data)[i]; + CHECK_EQ(unary_op->inputs.size(), 2) << "Sum needs 2 inputs"; + if (!IsConstantParameterArray(*model, unary_op->inputs[1])) { + AddMessageF("Axis input is non-constant"); + return false; } - for (int i = 0; i < output_buffer_size; ++i) { + auto& axis_array = model->GetArray(unary_op->inputs[1]); + CHECK(axis_array.data_type == ArrayDataType::kInt32); + int axis = axis_array.GetBuffer().data[0]; + CHECK_LT(axis, input_shape.dimensions_count()) << "Axis out of bounds"; + + // We currently only handle reduction on axis 0. + CHECK_EQ(axis, 0) << "Only reduction along axis 0 is supported"; + // We currently only handle 1-D and 2-D input tensors. + CHECK_LE(input_shape.dimensions_count(), 2) << "Rank >2 not yet supported"; + // We only support keep_dims=true; shape prop will need to change otherwise. + auto sum_op = static_cast(unary_op); + CHECK(sum_op->keep_dims) << "Only keep_dims=true is supported"; + + std::vector indices(input_shape.dimensions_count()); + for (int i = 0; i < input_shape.dims(1); ++i) { + indices[1] = i; + float sum = 0.f; + for (int j = 0; j < input_shape.dims(0); ++j) { + indices[0] = j; + sum += (*input_float_data)[Offset(input_shape, indices)]; + } output_float_data[i] = sum; } } else if (unary_op->type == OperatorType::kTensorFlowMin) { -- GitLab From 02b5fe290aea0e3cb8680d9e484f2b485bc92042 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Fri, 2 Mar 2018 01:58:06 +0800 Subject: [PATCH 204/311] Fix the error activation function link in custom_estimators --- tensorflow/docs_src/get_started/custom_estimators.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md index 42a246678a..ae89b639b4 100644 --- a/tensorflow/docs_src/get_started/custom_estimators.md +++ b/tensorflow/docs_src/get_started/custom_estimators.md @@ -213,7 +213,7 @@ is connected to every node in the preceding layer. Here's the relevant code: ``` * The `units` parameter defines the number of output neurons in a given layer. -* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#a) — +* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#activation_function) — [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this case. -- GitLab From 0265b5e632b35c2a5dff30e72e06aa5229bf0d45 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Thu, 1 Mar 2018 10:23:57 -0800 Subject: [PATCH 205/311] [XLA] Update operation semantics doc for BatchNorm operations - Update formulas for BatchNormGrad. The rendered version of the new formulas can be found here: https://latexbase.com/d/1ad54ff9-f9d5-4479-beef-156ea26a0632 - Update output table to include the symbol name for each output. - Fix a typo in BatchNormGrad's input table to correct display the symbol `beta`. PiperOrigin-RevId: 187496086 --- .../performance/xla/operation_semantics.md | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index eaf6aeba3d..8162382846 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -45,27 +45,30 @@ feature dimension in `operand`), the operation calculates the gradients with respect to `operand`, `offset` and `scale` across all the other dimensions. The `feature_index` must be a valid index for the feature dimension in `operand`. -The three gradients are defined by the following formulas: +The three gradients are defined by the following formulas (Assuming a +4-dimensional tensor as `operand` and (l) is the index for feature dimension): -\\( \nabla x = \nabla y * \gamma * \sqrt{\sigma^2+\epsilon} \\) +\\( coef_l = \frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (\nabla y_{ijkl} * (x_{ijkl} - \mu_l) / (\sigma^2_{l}+\epsilon)) \\) -\\( \nabla \gamma = sum(\nabla y * (x - \mu) * \sqrt{\sigma^2 + \epsilon}) \\) +\\( \nabla x_{ijkl} = \gamma_{l} * (1/\sqrt{\sigma^2_{l}+\epsilon}) * [\nabla y_{ijkl} - mean(\nabla y) - (x_{ijkl} - \mu_{l}) * coef_l] \\) -\\( \nabla \beta = sum(\nabla y) \\) +\\( \nabla \beta_l = \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl} \\) + +\\( \nabla \gamma_l = \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl} * ((x_{ijkl} - \mu_l) / \sqrt{\sigma^2_{l}+\epsilon}) \\) The inputs `mean` and `variance` represents moments value across batch and spatial dimensions. The output type is a tuple of three handles: -|Outputs | Type | Semantics | -|------------- | ----------------------- | ------------------------------------| -|`grad_operand`| `ComputationDataHandle` | gradient with respect to input | -: : : `operand` : -|`grad_scale` | `ComputationDataHandle` | gradient with respect to input | -: : : `scale` : -|`grad_offset` | `ComputationDataHandle` | gradient with respect to input | -: : : `offset` : +|Outputs | Type | Semantics | +|------------- | ----------------------- | ------------------------------------ | +|`grad_operand`| `ComputationDataHandle` | gradient with respect to input | +: : : `operand` (\\( \nabla x\\)) : +|`grad_scale` | `ComputationDataHandle` | gradient with respect to input | +: : : `scale` (\\( \nabla \gamma\\)) : +|`grad_offset` | `ComputationDataHandle` | gradient with respect to input | +: : : `offset`(\\( \nabla \beta\\)) : ## BatchNormInference @@ -119,11 +122,11 @@ Normalizes an array across batch and spatial dimensions. | Arguments | Type | Semantics | | --------------- | ----------------------- | -------------------------------- | | `operand` | `ComputationDataHandle` | n dimensional array to be | -: : : normalized : +: : : normalized (x) : | `scale` | `ComputationDataHandle` | 1 dimensional array | : : : (\\(\gamma\\)) : | `offset` | `ComputationDataHandle` | 1 dimensional array | -: : : (\\(\beta\\ ) : +: : : (\\(\beta\\)) : | `epsilon` | `float` | Epsilon value (\\(\epsilon\\)) | | `feature_index` | `int64` | Index to feature dimension | : : : in `operand` : -- GitLab From 16f7cb272f4810cb09f8238ba6b87f5945cd2b03 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Thu, 1 Mar 2018 10:29:11 -0800 Subject: [PATCH 206/311] Fix improper comments such as tf --> TensorFlow --- tensorflow/core/kernels/mkl_fused_batch_norm_op.cc | 4 ++-- tensorflow/core/kernels/mkl_relu_op.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index b7dee3fb3e..eccdece5e3 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -1249,8 +1249,8 @@ class MklFusedBatchNormGradOp : public OpKernel { tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); - // both src and diff_dst are tf layout, - // so get tf shape from anyont should be ok + // both src and diff_dst are TensorFlow layout, + // so it is OK to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 924b9da7e0..6c873af566 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -600,8 +600,8 @@ class MklReluGradOpBase : public OpKernel { tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); - // both src and diff_dst are tf layout, - // so get tf shape from anyone should be ok + // both src and diff_dst are TensorFlow layout, + // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, -- GitLab From ce8783a0d535b4657ecaab8e621ab7de568b80d6 Mon Sep 17 00:00:00 2001 From: Vijay Vasudevan Date: Thu, 1 Mar 2018 10:37:45 -0800 Subject: [PATCH 207/311] Remove old note that no longer applies. PiperOrigin-RevId: 187498339 --- tensorflow/core/BUILD | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 08832b58da..3271825251 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2224,8 +2224,6 @@ tf_cuda_library( alwayslink = 1, ) -# This library is deprecated and no longer publicly available. -# Do not add more uses of it. cc_library( name = "regexp_internal", hdrs = [ -- GitLab From 006d228201a1e9e140aa0651a59c51d3396a2d12 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 1 Mar 2018 10:38:27 -0800 Subject: [PATCH 208/311] Fixed the typo in RunConfig pydoc. PiperOrigin-RevId: 187498424 --- tensorflow/python/estimator/run_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 3e021242c4..62f035bce5 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -345,7 +345,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'worker', 'index': 1}}) - config = ClusterConfig() + config = RunConfig() assert config.master == 'host4:2222' assert config.task_id == 1 assert config.num_ps_replicas == 2 @@ -363,7 +363,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'chief', 'index': 0}}) - config = ClusterConfig() + config = RunConfig() assert config.master == 'host0:2222' assert config.task_id == 0 assert config.num_ps_replicas == 2 @@ -381,7 +381,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'evaluator', 'index': 0}}) - config = ClusterConfig() + config = RunConfig() assert config.master == '' assert config.evaluator_master == '' assert config.task_id == 0 -- GitLab From 12bd86fb45d1b5981896ea7500a465cc017c3ab8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 11:16:18 -0800 Subject: [PATCH 209/311] Internal cleanup. PiperOrigin-RevId: 187504966 --- .../java/org/tensorflow/lite/Interpreter.java | 6 + .../lite/NativeInterpreterWrapper.java | 25 +++- .../native/nativeinterpreterwrapper_jni.cc | 107 ++++++++++++++---- .../native/nativeinterpreterwrapper_jni.h | 11 +- .../lite/NativeInterpreterWrapperTest.java | 24 ++++ 5 files changed, 140 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index dd883d69d2..9286814b74 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -80,6 +80,9 @@ public final class Interpreter implements AutoCloseable { /** * Runs model inference if the model takes only one input, and provides only one output. * + *

Warning: The API runs much faster if {@link ByteBuffer} is used as input data type. Please + * consider using {@link ByteBuffer} to feed input data for better performance. + * * @param input an array or multidimensional array, or a {@link ByteBuffer} of primitive types * including int, float, long, and byte. {@link ByteBuffer} is the preferred way to pass large * input data. When {@link ByteBuffer} is used, its content should remain unchanged until @@ -96,6 +99,9 @@ public final class Interpreter implements AutoCloseable { /** * Runs model inference if the model takes multiple inputs, or returns multiple outputs. * + *

Warning: The API runs much faster if {@link ByteBuffer} is used as input data type. Please + * consider using {@link ByteBuffer} to feed input data for better performance. + * * @param inputs an array of input data. The inputs should be in the same order as inputs of the * model. Each input can be an array or multidimensional array, or a {@link ByteBuffer} of * primitive types including int, float, long, and byte. {@link ByteBuffer} is the preferred diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index 7612be0ddd..bca4a3cae6 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -35,6 +35,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); modelHandle = createModel(modelPath, errorHandle); interpreterHandle = createInterpreter(modelHandle, errorHandle); + isMemoryAllocated = true; } /** @@ -47,6 +48,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle); interpreterHandle = createInterpreter(modelHandle, errorHandle); + isMemoryAllocated = true; } /** Releases resources associated with this {@code NativeInterpreterWrapper}. */ @@ -59,6 +61,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { modelByteBuffer = null; inputsIndexes = null; outputsIndexes = null; + isMemoryAllocated = false; } /** Sets inputs, runs model inference and returns outputs. */ @@ -93,10 +96,19 @@ final class NativeInterpreterWrapper implements AutoCloseable { } inferenceDurationNanoseconds = -1; long[] outputsHandles = - run(interpreterHandle, errorHandle, sizes, dataTypes, numsOfBytes, inputs, this); + run( + interpreterHandle, + errorHandle, + sizes, + dataTypes, + numsOfBytes, + inputs, + this, + isMemoryAllocated); if (outputsHandles == null || outputsHandles.length == 0) { throw new IllegalStateException("Interpreter has no outputs."); } + isMemoryAllocated = true; Tensor[] outputs = new Tensor[outputsHandles.length]; for (int i = 0; i < outputsHandles.length; ++i) { outputs[i] = Tensor.fromHandle(outputsHandles[i]); @@ -111,14 +123,17 @@ final class NativeInterpreterWrapper implements AutoCloseable { int[] dtypes, int[] numsOfBytes, Object[] values, - NativeInterpreterWrapper wrapper); + NativeInterpreterWrapper wrapper, + boolean memoryAllocated); /** Resizes dimensions of a specific input. */ void resizeInput(int idx, int[] dims) { - resizeInput(interpreterHandle, errorHandle, idx, dims); + if (resizeInput(interpreterHandle, errorHandle, idx, dims)) { + isMemoryAllocated = false; + } } - private static native void resizeInput( + private static native boolean resizeInput( long interpreterHandle, long errorHandle, int inputIdx, int[] dims); void setUseNNAPI(boolean useNNAPI) { @@ -264,6 +279,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { private Map outputsIndexes; + private boolean isMemoryAllocated = false; + private static native String[] getInputNames(long interpreterHandle); private static native String[] getOutputNames(long interpreterHandle); diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index e405df0745..47bf4c9c9d 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -149,6 +149,45 @@ TfLiteStatus checkInputs(JNIEnv* env, tflite::Interpreter* interpreter, return kTfLiteOk; } +// Checks whether there is any difference between dimensions of a tensor and a +// given dimensions. Returns true if there is difference, else false. +bool areDimsDifferent(JNIEnv* env, TfLiteTensor* tensor, jintArray dims) { + int num_dims = static_cast(env->GetArrayLength(dims)); + jint* ptr = env->GetIntArrayElements(dims, nullptr); + if (ptr == nullptr) { + throwException(env, kIllegalArgumentException, + "Empty dimensions of input array."); + return true; + } + if (tensor->dims->size != num_dims) { + return true; + } + for (int i = 0; i < num_dims; ++i) { + if (ptr[i] != tensor->dims->data[i]) { + return true; + } + } + env->ReleaseIntArrayElements(dims, ptr, JNI_ABORT); + return false; +} + +bool areInputDimensionsTheSame(JNIEnv* env, tflite::Interpreter* interpreter, + int input_size, jobjectArray sizes) { + if (interpreter->inputs().size() != input_size) { + return false; + } + for (int i = 0; i < input_size; ++i) { + int input_idx = interpreter->inputs()[i]; + jintArray dims = + static_cast(env->GetObjectArrayElement(sizes, i)); + TfLiteTensor* target = interpreter->tensor(input_idx); + if (areDimsDifferent(env, target, dims)) return false; + env->DeleteLocalRef(dims); + if (env->ExceptionCheck()) return false; + } + return true; +} + TfLiteStatus resizeInputs(JNIEnv* env, tflite::Interpreter* interpreter, int input_size, jobjectArray sizes) { for (int i = 0; i < input_size; ++i) { @@ -344,6 +383,15 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( throwException(env, kIllegalArgumentException, "Cannot create interpreter: %s", error_reporter->CachedErrorMessage()); + return 0; + } + // allocates memory + status = interpreter->AllocateTensors(); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, + "Can not allocate memory for the interpreter", + error_reporter->CachedErrorMessage()); + return 0; } return reinterpret_cast(interpreter.release()); } @@ -353,7 +401,7 @@ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values, jobject wrapper) { + jobjectArray values, jobject wrapper, jboolean memory_allocated) { tflite::Interpreter* interpreter = convertLongToInterpreter(env, interpreter_handle); if (interpreter == nullptr) return nullptr; @@ -365,20 +413,23 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( TfLiteStatus status = checkInputs(env, interpreter, input_size, data_types, nums_of_bytes, values, sizes); if (status != kTfLiteOk) return nullptr; - // resizes inputs - status = resizeInputs(env, interpreter, input_size, sizes); - if (status != kTfLiteOk) { - throwException(env, kNullPointerException, "Can not resize the input: %s", - error_reporter->CachedErrorMessage()); - return nullptr; - } - // allocates memory - status = interpreter->AllocateTensors(); - if (status != kTfLiteOk) { - throwException(env, kNullPointerException, - "Can not allocate memory for the given inputs: %s", - error_reporter->CachedErrorMessage()); - return nullptr; + if (!memory_allocated || + !areInputDimensionsTheSame(env, interpreter, input_size, sizes)) { + // resizes inputs + status = resizeInputs(env, interpreter, input_size, sizes); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, "Can not resize the input: %s", + error_reporter->CachedErrorMessage()); + return nullptr; + } + // allocates memory + status = interpreter->AllocateTensors(); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, + "Can not allocate memory for the given inputs: %s", + error_reporter->CachedErrorMessage()); + return nullptr; + } } // sets inputs status = setInputs(env, interpreter, input_size, data_types, nums_of_bytes, @@ -448,29 +499,37 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( return outputs; } -JNIEXPORT void JNICALL +JNIEXPORT jboolean JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jint input_idx, jintArray dims) { BufferErrorReporter* error_reporter = convertLongToErrorReporter(env, error_handle); - if (error_reporter == nullptr) return; + if (error_reporter == nullptr) return JNI_FALSE; tflite::Interpreter* interpreter = convertLongToInterpreter(env, interpreter_handle); - if (interpreter == nullptr) return; + if (interpreter == nullptr) return JNI_FALSE; const int idx = static_cast(input_idx); if (idx < 0 || idx >= interpreter->inputs().size()) { throwException(env, kIllegalArgumentException, "Can not resize %d-th input for a model having %d inputs.", idx, interpreter->inputs().size()); + return JNI_FALSE; } - TfLiteStatus status = interpreter->ResizeInputTensor( - interpreter->inputs()[idx], convertJIntArrayToVector(env, dims)); - if (status != kTfLiteOk) { - throwException(env, kIllegalArgumentException, - "Failed to resize %d-th input: %s", idx, - error_reporter->CachedErrorMessage()); + // check whether it is resizing with the same dimensions. + TfLiteTensor* target = interpreter->tensor(input_idx); + bool is_changed = areDimsDifferent(env, target, dims); + if (is_changed) { + TfLiteStatus status = interpreter->ResizeInputTensor( + interpreter->inputs()[idx], convertJIntArrayToVector(env, dims)); + if (status != kTfLiteOk) { + throwException(env, kIllegalArgumentException, + "Failed to resize %d-th input: %s", idx, + error_reporter->CachedErrorMessage()); + return JNI_FALSE; + } } + return is_changed ? JNI_TRUE : JNI_FALSE; } JNIEXPORT void JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_delete( diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index 31c8f1bc88..f7c2d9bf82 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -109,13 +109,13 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: * Signature: - * (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;Lorg/tensorflow/lite/NativeInterpreterWrapper;)[J + * (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;Ljava/lang/Object;Z)[J */ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values, jobject wrapper); + jobjectArray values, jobject wrapper, jboolean memory_allocated); /* * Class: org_tensorflow_lite_NativeInterpreterWrapper @@ -132,11 +132,12 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: - * Signature: (JJI[I) + * Signature: (JJI[I)Z * - * It resizes dimensions of a input. + * It returns true if resizing input tensor to different dimensions, else return + * false. */ -JNIEXPORT void JNICALL +JNIEXPORT jboolean JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jint input_idx, jintArray dims); diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index 8c1f2406f7..6371fb59dc 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -94,6 +94,30 @@ public final class NativeInterpreterWrapperTest { wrapper.close(); } + @Test + public void testRunWithInputsOfSameDims() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, -6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + float[][][][] parsedOutputs = new float[2][8][8][3]; + outputs[0].copyTo(parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, -19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + parsedOutputs = new float[2][8][8][3]; + outputs[0].copyTo(parsedOutputs); + outputOneD = parsedOutputs[0][0][0]; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + wrapper.close(); + } + @Test public void testRunWithInt() { NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(INT_MODEL_PATH); -- GitLab From 34eddebe5127a984a058cb7c2b003c2fd49f5c82 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 1 Mar 2018 11:46:56 -0800 Subject: [PATCH 210/311] [XLA] Optimize away DynamicUpdateSlice with update parameter with a dimension of zero. A zero sized update has no effect. PiperOrigin-RevId: 187510099 --- .../xla/service/algebraic_simplifier.cc | 8 +++++++ .../xla/service/algebraic_simplifier_test.cc | 23 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 5ddd8ec377..ecaa474336 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1625,6 +1625,14 @@ Status AlgebraicSimplifierVisitor::HandleDynamicUpdateSlice( if (IsAll(start_indices, 0) && SameShape(dynamic_update_slice, update)) { return ReplaceInstruction(dynamic_update_slice, update); } + + // If any dimension of update is 0, elide the DynamicUpdateSlice. This + // optimization becomes invalid should we later prefer to warn about out of + // bound indices. + if (ShapeUtil::HasZeroElements(update->shape())) { + return ReplaceInstruction(dynamic_update_slice, + dynamic_update_slice->mutable_operand(0)); + } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 667ae01993..451294ef5d 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2800,6 +2800,29 @@ DotOfConcatTestSpec kDotOfConcatTestSpecs[] = { {/*m=*/1, /*k=*/16, /*n=*/1}, // }; +// Test that DynamicUpdateSlice update param with any dimension equal to zero +// gets removed. +TEST_F(AlgebraicSimplifierTest, DynamicUpdateSliceZeroUpdate) { + HloComputation::Builder builder(TestName()); + const Shape dslice_shape = ShapeUtil::MakeShape(F32, {10}); + HloInstruction* const operand = builder.AddInstruction( + HloInstruction::CreateParameter(0, dslice_shape, "operand")); + const Shape update_shape = ShapeUtil::MakeShape(F32, {0}); + HloInstruction* const update = builder.AddInstruction( + HloInstruction::CreateParameter(1, update_shape, "update")); + HloInstruction* const start_indices = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1({0}))); + builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + dslice_shape, operand, update, start_indices)); + const HloComputation* const computation = + module().AddEntryComputation(builder.Build()); + + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(&module()).ValueOrDie()); + EXPECT_THAT(computation->root_instruction(), operand); +} + INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); -- GitLab From f176a611605bb26b17ef16d096e66d9d9ab2bda9 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 1 Mar 2018 11:59:14 -0800 Subject: [PATCH 211/311] Refactor training part of the Keras engine. Also add support for sample/class weights with eager execution. Structure before: engine/training.py engine/training_eager.py After: engine/training.py engine/training_arrays.py engine/training_eager.py engine/training_generator.py engine/training_utils.py All new files are about 500 lines long. training.py is now 1700 lines long (about 1000 lines of logic). It was previously 3000 lines long. PiperOrigin-RevId: 187511923 --- tensorflow/python/keras/BUILD | 9 +- .../keras/_impl/keras/engine/training.py | 1494 +---------------- .../_impl/keras/engine/training_arrays.py | 495 ++++++ .../_impl/keras/engine/training_eager.py | 314 ++-- .../_impl/keras/engine/training_eager_test.py | 223 +++ .../_impl/keras/engine/training_generator.py | 439 +++++ .../keras/_impl/keras/engine/training_test.py | 14 +- .../_impl/keras/engine/training_utils.py | 534 ++++++ .../keras/_impl/keras/utils/__init__.py | 2 +- .../{training_utils.py => multi_gpu_utils.py} | 0 ..._utils_test.py => multi_gpu_utils_test.py} | 0 tensorflow/python/keras/utils/__init__.py | 2 +- 12 files changed, 1966 insertions(+), 1560 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/engine/training_arrays.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/training_generator.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/training_utils.py rename tensorflow/python/keras/_impl/keras/utils/{training_utils.py => multi_gpu_utils.py} (100%) rename tensorflow/python/keras/_impl/keras/utils/{training_utils_test.py => multi_gpu_utils_test.py} (100%) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index a98d08f928..bd1aac5eae 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -45,7 +45,10 @@ py_library( "_impl/keras/engine/saving.py", "_impl/keras/engine/sequential.py", "_impl/keras/engine/training.py", + "_impl/keras/engine/training_arrays.py", "_impl/keras/engine/training_eager.py", + "_impl/keras/engine/training_generator.py", + "_impl/keras/engine/training_utils.py", "_impl/keras/estimator.py", "_impl/keras/initializers.py", "_impl/keras/layers/__init__.py", @@ -78,8 +81,8 @@ py_library( "_impl/keras/utils/generic_utils.py", "_impl/keras/utils/io_utils.py", "_impl/keras/utils/layer_utils.py", + "_impl/keras/utils/multi_gpu_utils.py", "_impl/keras/utils/np_utils.py", - "_impl/keras/utils/training_utils.py", "_impl/keras/utils/vis_utils.py", "_impl/keras/wrappers/__init__.py", "_impl/keras/wrappers/scikit_learn.py", @@ -646,9 +649,9 @@ py_test( ) py_test( - name = "training_utils_test", + name = "multi_gpu_utils_test", size = "medium", - srcs = ["_impl/keras/utils/training_utils_test.py"], + srcs = ["_impl/keras/utils/multi_gpu_utils_test.py"], srcs_version = "PY2AND3", tags = ["multi_gpu"], deps = [ diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index c121d819ff..2d040e7c0f 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -18,26 +18,21 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import copy - import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras import optimizers +from tensorflow.python.keras._impl.keras.engine import training_arrays from tensorflow.python.keras._impl.keras.engine import training_eager +from tensorflow.python.keras._impl.keras.engine import training_generator +from tensorflow.python.keras._impl.keras.engine import training_utils from tensorflow.python.keras._impl.keras.engine.base_layer import Layer from tensorflow.python.keras._impl.keras.engine.network import Network -from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer -from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer -from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence -from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches -from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.layers.base import _DeferredTensor from tensorflow.python.ops import array_ops @@ -45,472 +40,6 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.util.tf_export import tf_export -try: - from scipy.sparse import issparse # pylint: disable=g-import-not-at-top -except ImportError: - issparse = None - - -def _standardize_input_data(data, - names, - shapes=None, - check_batch_axis=True, - exception_prefix=''): - """Normalizes inputs and targets provided by users. - - Users may pass data as a list of arrays, dictionary of arrays, - or as a single array. We normalize this to an ordered list of - arrays (same order as `names`), while checking that the provided - arrays have shapes that match the network's expectations. - - Arguments: - data: User-provided input data (polymorphic). - names: List of expected array names. - shapes: Optional list of expected array shapes. - check_batch_axis: Boolean; whether to check that - the batch axis of the arrays matches the expected - value found in `shapes`. - exception_prefix: String prefix used for exception formatting. - - Returns: - List of standardized input arrays (one array per model input). - - Raises: - ValueError: in case of improperly formatted user-provided data. - """ - if not names: - if data is not None and hasattr(data, '__len__') and len(data): - raise ValueError('Error when checking model ' + exception_prefix + ': ' - 'expected no data, but got:', data) - return [] - if data is None: - return [None for _ in range(len(names))] - - if isinstance(data, dict): - try: - data = [ - data[x].values - if data[x].__class__.__name__ == 'DataFrame' else data[x] - for x in names - ] - except KeyError as e: - raise ValueError('No data provided for "' + e.args[0] + '". Need data ' - 'for each key in: ' + str(names)) - elif isinstance(data, list): - if isinstance(data[0], list): - data = [np.asarray(d) for d in data] - elif len(names) == 1 and isinstance(data[0], (float, int)): - data = [np.asarray(data)] - else: - data = [ - x.values if x.__class__.__name__ == 'DataFrame' else x for x in data - ] - else: - data = data.values if data.__class__.__name__ == 'DataFrame' else data - data = [data] - data = [ - np.expand_dims(x, 1) if x is not None and x.ndim == 1 else x for x in data - ] - - if len(data) != len(names): - if data and hasattr(data[0], 'shape'): - raise ValueError('Error when checking model ' + exception_prefix + - ': the list of Numpy arrays that you are passing to ' - 'your model is not the size the model expected. ' - 'Expected to see ' + str(len(names)) + ' array(s), ' - 'but instead got the following list of ' + - str(len(data)) + ' arrays: ' + str(data)[:200] + '...') - elif len(names) > 1: - raise ValueError( - 'Error when checking model ' + exception_prefix + - ': you are passing a list as input to your model, ' - 'but the model expects a list of ' + str(len(names)) + - ' Numpy arrays instead. The list you passed was: ' + str(data)[:200]) - elif len(data) == 1 and not hasattr(data[0], 'shape'): - raise TypeError('Error when checking model ' + exception_prefix + - ': data should be a Numpy array, or list/dict of ' - 'Numpy arrays. Found: ' + str(data)[:200] + '...') - elif len(names) == 1: - data = [np.asarray(data)] - - # Check shapes compatibility. - if shapes: - for i in range(len(names)): - if shapes[i] is not None: - data_shape = data[i].shape - shape = shapes[i] - if data[i].ndim != len(shape): - raise ValueError('Error when checking ' + exception_prefix + - ': expected ' + names[i] + ' to have ' + - str(len(shape)) + ' dimensions, but got array ' - 'with shape ' + str(data_shape)) - if not check_batch_axis: - data_shape = data_shape[1:] - shape = shape[1:] - for dim, ref_dim in zip(data_shape, shape): - if ref_dim != dim and ref_dim: - raise ValueError( - 'Error when checking ' + exception_prefix + ': expected ' + - names[i] + ' to have shape ' + str(shape) + - ' but got array with shape ' + str(data_shape)) - return data - - -def _standardize_sample_or_class_weights(x_weight, output_names, weight_type): - """Maps `sample_weight` or `class_weight` to model outputs. - - Arguments: - x_weight: User-provided `sample_weight` or `class_weight` argument. - output_names: List of output names (strings) in the model. - weight_type: A string used purely for exception printing. - - Returns: - A list of `sample_weight` or `class_weight` where there are exactly - one element per model output. - - Raises: - ValueError: In case of invalid user-provided argument. - """ - if x_weight is None or len(x_weight) == 0: # pylint: disable=g-explicit-length-test - return [None for _ in output_names] - if len(output_names) == 1: - if isinstance(x_weight, list) and len(x_weight) == 1: - return x_weight - if isinstance(x_weight, dict) and output_names[0] in x_weight: - return [x_weight[output_names[0]]] - else: - return [x_weight] - if isinstance(x_weight, list): - if len(x_weight) != len(output_names): - raise ValueError('Provided `' + weight_type + '` was a list of ' + - str(len(x_weight)) + ' elements, but the model has ' + - str(len(output_names)) + ' outputs. ' - 'You should provide one `' + weight_type + '`' - 'array per model output.') - return x_weight - if isinstance(x_weight, dict): - x_weights = [] - for name in output_names: - x_weights.append(x_weight.get(name)) - return x_weights - else: - raise TypeError( - 'The model has multiple outputs, so `' + weight_type + '` ' - 'should be either a list or a dict. ' - 'Provided `' + weight_type + '` type not understood: ' + str(x_weight)) - - -def _standardize_class_weights(class_weight, output_names): - return _standardize_sample_or_class_weights(class_weight, output_names, - 'class_weight') - - -def _standardize_sample_weights(sample_weight, output_names): - return _standardize_sample_or_class_weights(sample_weight, output_names, - 'sample_weight') - - -def _check_array_lengths(inputs, targets, weights=None): - """Does user input validation for numpy arrays. - - Arguments: - inputs: list of Numpy arrays of inputs. - targets: list of Numpy arrays of targets. - weights: list of Numpy arrays of sample weights. - - Raises: - ValueError: in case of incorrectly formatted data. - """ - - def set_of_lengths(x): - # return a set with the variation between - # different shapes, with None => 0 - if x is None: - return {} - else: - return set([y.shape[0] for y in x if y is not None]) - - set_x = set_of_lengths(inputs) - set_y = set_of_lengths(targets) - set_w = set_of_lengths(weights) - if len(set_x) > 1: - raise ValueError('All input arrays (x) should have ' - 'the same number of samples. Got array shapes: ' + - str([x.shape for x in inputs])) - if len(set_y) > 1: - raise ValueError('All target arrays (y) should have ' - 'the same number of samples. Got array shapes: ' + - str([y.shape for y in targets])) - if set_x and set_y and list(set_x)[0] != list(set_y)[0]: - raise ValueError('Input arrays should have ' - 'the same number of samples as target arrays. ' - 'Found ' + str(list(set_x)[0]) + ' input samples ' - 'and ' + str(list(set_y)[0]) + ' target samples.') - if len(set_w) > 1: - raise ValueError('All sample_weight arrays should have ' - 'the same number of samples. Got array shapes: ' + - str([w.shape for w in weights])) - if set_y and set_w and list(set_y)[0] != list(set_w)[0]: - raise ValueError('Sample_weight arrays should have ' - 'the same number of samples as target arrays. Got ' + - str(list(set_y)[0]) + ' input samples and ' + - str(list(set_w)[0]) + ' target samples.') - - -def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): - """Does validation on the compatibility of targets and loss functions. - - This helps prevent users from using loss functions incorrectly. This check - is purely for UX purposes. - - Arguments: - targets: list of Numpy arrays of targets. - loss_fns: list of loss functions. - output_shapes: list of shapes of model outputs. - - Raises: - ValueError: if a loss function or target array - is incompatible with an output. - """ - key_losses = { - losses.mean_squared_error, losses.binary_crossentropy, - losses.categorical_crossentropy - } - for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None or tensor_util.is_tensor(y): - continue - if loss is losses.categorical_crossentropy: - if y.shape[-1] == 1: - raise ValueError('You are passing a target array of shape ' + str( - y.shape) + ' while using as loss `categorical_crossentropy`. ' - '`categorical_crossentropy` expects ' - 'targets to be binary matrices (1s and 0s) ' - 'of shape (samples, classes). ' - 'If your targets are integer classes, ' - 'you can convert them to the expected format via:\n' - '```\n' - 'from keras.utils import to_categorical\n' - 'y_binary = to_categorical(y_int)\n' - '```\n' - '\n' - 'Alternatively, you can use the loss function ' - '`sparse_categorical_crossentropy` instead, ' - 'which does expect integer targets.') - if loss in key_losses: - for target_dim, out_dim in zip(y.shape[1:], shape[1:]): - if out_dim is not None and target_dim != out_dim: - raise ValueError('A target array with shape ' + str(y.shape) + - ' was passed for an output of shape ' + str(shape) + - ' while using as loss `' + loss.__name__ + '`. ' - 'This loss expects ' - 'targets to have the same shape ' - 'as the output.') - - -def _collect_metrics(metrics, output_names): - """Maps metric functions to model outputs. - - Arguments: - metrics: a list or dict of metric functions. - output_names: a list of the names (strings) of model outputs. - - Returns: - A list (one entry per model output) of lists of metric functions. - For instance, if the model has 2 outputs, and for the first output - we want to compute "binary_accuracy" and "binary_crossentropy", - and just "binary_accuracy" for the second output, - the list would look like: - `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]` - - Raises: - TypeError: if an incorrect type is passed for the `metrics` argument. - """ - if not metrics: - return [[] for _ in output_names] - if isinstance(metrics, list): - # we then apply all metrics to all outputs. - return [copy.copy(metrics) for _ in output_names] - elif isinstance(metrics, dict): - nested_metrics = [] - for name in output_names: - output_metrics = metrics.get(name, []) - if not isinstance(output_metrics, list): - output_metrics = [output_metrics] - nested_metrics.append(output_metrics) - return nested_metrics - else: - raise TypeError('Type of `metrics` argument not understood. ' - 'Expected a list or dictionary, found: ' + str(metrics)) - - -def _batch_shuffle(index_array, batch_size): - """Shuffles an array in a batch-wise fashion. - - Useful for shuffling HDF5 arrays - (where one cannot access arbitrary indices). - - Arguments: - index_array: array of indices to be shuffled. - batch_size: integer. - - Returns: - The `index_array` array, shuffled in a batch-wise fashion. - """ - batch_count = int(len(index_array) / batch_size) - # to reshape we need to be cleanly divisible by batch size - # we stash extra items and reappend them after shuffling - last_batch = index_array[batch_count * batch_size:] - index_array = index_array[:batch_count * batch_size] - index_array = index_array.reshape((batch_count, batch_size)) - np.random.shuffle(index_array) - index_array = index_array.flatten() - return np.append(index_array, last_batch) - - -def _weighted_masked_objective(fn): - """Adds support for masking and sample-weighting to an objective function. - - It transforms an objective function `fn(y_true, y_pred)` - into a sample-weighted, cost-masked objective function - `fn(y_true, y_pred, weights, mask)`. - - Arguments: - fn: The objective function to wrap, - with signature `fn(y_true, y_pred)`. - - Returns: - A function with signature `fn(y_true, y_pred, weights, mask)`. - """ - if fn is None: - return None - - def weighted(y_true, y_pred, weights, mask=None): - """Wrapper function. - - Arguments: - y_true: `y_true` argument of `fn`. - y_pred: `y_pred` argument of `fn`. - weights: Weights tensor. - mask: Mask tensor. - - Returns: - Scalar tensor. - """ - # score_array has ndim >= 2 - score_array = fn(y_true, y_pred) - if mask is not None: - # Cast the mask to floatX to avoid float64 upcasting in theano - mask = K.cast(mask, K.floatx()) - # mask should have the same shape as score_array - score_array *= mask - # the loss per batch should be proportional - # to the number of unmasked samples. - score_array /= K.mean(mask) - - # apply sample weighting - if weights is not None: - # reduce score_array to same ndim as weight array - ndim = K.ndim(score_array) - weight_ndim = K.ndim(weights) - score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) - score_array *= weights - score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) - return K.mean(score_array) - - return weighted - - -def _standardize_weights(y, - sample_weight=None, - class_weight=None, - sample_weight_mode=None): - """Performs sample weight validation and standardization. - - Everything gets normalized to a single sample-wise (or timestep-wise) - weight array. - - Arguments: - y: Numpy array of model targets to be weighted. - sample_weight: User-provided `sample_weight` argument. - class_weight: User-provided `class_weight` argument. - sample_weight_mode: One of `None` or `"temporal"`. - `"temporal"` indicated that we expect 2D weight data - that will be applied to the last 2 dimensions of - the targets (i.e. we are weighting timesteps, not samples). - - Returns: - A numpy array of target weights, one entry per sample to weight. - - Raises: - ValueError: In case of invalid user-provided arguments. - """ - if sample_weight_mode is not None: - if sample_weight_mode != 'temporal': - raise ValueError('"sample_weight_mode ' - 'should be None or "temporal". ' - 'Found: ' + str(sample_weight_mode)) - if len(y.shape) < 3: - raise ValueError('Found a sample_weight array for ' - 'an input with shape ' + str(y.shape) + '. ' - 'Timestep-wise sample weighting (use of ' - 'sample_weight_mode="temporal") is restricted to ' - 'outputs that are at least 3D, i.e. that have ' - 'a time dimension.') - if sample_weight is not None and len(sample_weight.shape) != 2: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + '. ' - 'In order to use timestep-wise sample weighting, ' - 'you should pass a 2D sample_weight array.') - else: - if sample_weight is not None and len(sample_weight.shape) != 1: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + '. ' - 'In order to use timestep-wise sample weights, ' - 'you should specify ' - 'sample_weight_mode="temporal" ' - 'in compile(). If you just mean to use ' - 'sample-wise weights, make sure your ' - 'sample_weight array is 1D.') - - if sample_weight is not None: - if len(sample_weight.shape) > len(y.shape): - raise ValueError( - 'Found a sample_weight with shape' + str(sample_weight.shape) + '.' - 'Expected sample_weight with rank ' - 'less than or equal to ' + str(len(y.shape))) - - if y.shape[:sample_weight.ndim] != sample_weight.shape: - raise ValueError( - 'Found a sample_weight array with shape ' + str(sample_weight.shape) + - ' for an input with shape ' + str(y.shape) + '. ' - 'sample_weight cannot be broadcast.') - return sample_weight - elif isinstance(class_weight, dict): - if len(y.shape) > 2: - raise ValueError('`class_weight` not supported for ' - '3+ dimensional targets.') - if y.shape[1] > 1: - y_classes = np.argmax(y, axis=1) - elif y.shape[1] == 1: - y_classes = np.reshape(y, y.shape[0]) - else: - y_classes = y - - weights = np.asarray( - [class_weight[cls] for cls in y_classes if cls in class_weight]) - - if len(weights) != len(y_classes): - # subtract the sets to pick all missing classes - existing_classes = set(y_classes) - existing_class_weight = set(class_weight.keys()) - raise ValueError('`class_weight` must contain all classes in the data.' - ' The classes %s exist in the data but not in ' - '`class_weight`.' % - (existing_classes - existing_class_weight)) - return weights - else: - return None - @tf_export('keras.models.Model', 'keras.Model') class Model(Network): @@ -687,7 +216,8 @@ class Model(Network): loss_functions = [loss_function for _ in range(len(self.outputs))] self.loss_functions = loss_functions - weighted_losses = [_weighted_masked_objective(fn) for fn in loss_functions] + weighted_losses = [training_utils.weighted_masked_objective(fn) + for fn in loss_functions] skip_target_indices = [] skip_target_weighing_indices = [] self._feed_outputs = [] @@ -744,7 +274,8 @@ class Model(Network): for i in range(len(self.outputs)): if len(self.outputs) > 1: self.metrics_names.append(self.output_names[i] + '_loss') - self.nested_metrics = _collect_metrics(metrics, self.output_names) + self.nested_metrics = training_utils.collect_metrics(metrics, + self.output_names) self._feed_sample_weight_modes = [] for i in range(len(self.outputs)): self._feed_sample_weight_modes.append(None) @@ -914,9 +445,9 @@ class Model(Network): # List of same size as output_names. # contains tuples (metrics for output, names of metrics). - nested_metrics = _collect_metrics(metrics, self.output_names) - nested_weighted_metrics = _collect_metrics(weighted_metrics, - self.output_names) + nested_metrics = training_utils.collect_metrics(metrics, self.output_names) + nested_weighted_metrics = training_utils.collect_metrics(weighted_metrics, + self.output_names) self.metrics_updates = [] self.stateful_metric_names = [] with K.name_scope('metrics'): @@ -962,11 +493,13 @@ class Model(Network): suffix = 'acc' elif metric in ('crossentropy', 'ce'): suffix = 'ce' - weighted_metric_fn = _weighted_masked_objective(metric_fn) + weighted_metric_fn = training_utils.weighted_masked_objective( + metric_fn) metric_name = metric_name_prefix + suffix else: metric_fn = metrics_module.get(metric) - weighted_metric_fn = _weighted_masked_objective(metric_fn) + weighted_metric_fn = training_utils.weighted_masked_objective( + metric_fn) # Get metric name as string if hasattr(metric_fn, 'name'): metric_name = metric_fn.name @@ -1104,451 +637,6 @@ class Model(Network): name='predict_function', **kwargs) - def _check_num_samples(self, - ins, - batch_size=None, - steps=None, - steps_name='steps'): - """Determine the number of samples provided for training and evaluation. - - The number of samples is not defined when running with `steps`, - in which case the number of samples is set to `None`. - - Arguments: - ins: List of tensors to be fed to the Keras function. - batch_size: Integer batch size or `None` if not defined. - steps: Total number of steps (batches of samples) - before declaring `_predict_loop` finished. - Ignored with the default value of `None`. - steps_name: The public API's parameter name for `steps`. - - Raises: - ValueError: when `steps` is `None` and the attribute `ins.shape` - does not exist. Also raises ValueError when `steps` is not `None` - and `batch_size` is not `None` because they are mutually - exclusive. - - Returns: - When steps is `None`, returns the number of samples to be - processed based on the size of the first dimension of the - first input numpy array. When steps is not `None` and - `batch_size` is `None`, returns `None`. - - Raises: - ValueError: In case of invalid arguments. - """ - if steps is not None: - num_samples = None - if batch_size is not None: - raise ValueError( - 'If ' + steps_name + ' is set, the `batch_size` must be None.') - elif ins and hasattr(ins[0], 'shape'): - num_samples = ins[0].shape[0] - else: - raise ValueError( - 'Either the input data should have ' - 'a defined shape, or ' + steps_name + ' should be specified.') - return num_samples - - def _fit_loop(self, - f, - ins, - out_labels=None, - batch_size=None, - epochs=100, - verbose=1, - callbacks=None, - val_f=None, - val_ins=None, - shuffle=True, - callback_metrics=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None): - """Abstract fit function for `f(ins)`. - - Assume that f returns a list, labeled by out_labels. - - Arguments: - f: Keras function returning a list of tensors - ins: List of tensors to be fed to `f` - out_labels: List of strings, display names of - the outputs of `f` - batch_size: Integer batch size or None if unknown. - epochs: Number of times to iterate over the data - verbose: Verbosity mode, 0, 1 or 2 - callbacks: List of callbacks to be called during training - val_f: Keras function to call for validation - val_ins: List of tensors to be fed to `val_f` - shuffle: Whether to shuffle the data at the beginning of each epoch - callback_metrics: List of strings, the display names of the metrics - passed to the callbacks. They should be the - concatenation of list the display names of the outputs of - `f` and the list of display names of the outputs of `f_val`. - initial_epoch: Epoch at which to start training - (useful for resuming a previous training run) - steps_per_epoch: Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. Ignored with the default value of `None`. - validation_steps: Number of steps to run validation for - (only if doing validation from data tensors). - Ignored with the default value of `None`. - - Returns: - `History` object. - - Raises: - ValueError: in case of invalid arguments. - """ - do_validation = False - if val_f and val_ins: - do_validation = True - if verbose and ins and hasattr(ins[0], 'shape') and hasattr( - val_ins[0], 'shape'): - print('Train on %d samples, validate on %d samples' % - (ins[0].shape[0], val_ins[0].shape[0])) - if validation_steps: - do_validation = True - if steps_per_epoch is None: - raise ValueError('Can only use `validation_steps` ' - 'when doing step-wise ' - 'training, i.e. `steps_per_epoch` ' - 'must be set.') - - num_train_samples = self._check_num_samples( - ins, batch_size, steps_per_epoch, 'steps_per_epoch') - if num_train_samples is not None: - index_array = np.arange(num_train_samples) - - self.history = cbks.History() - all_callbacks = [cbks.BaseLogger( - stateful_metrics=self.stateful_metric_names)] - if verbose: - if steps_per_epoch is not None: - count_mode = 'steps' - else: - count_mode = 'samples' - all_callbacks.append( - cbks.ProgbarLogger( - count_mode, stateful_metrics=self.stateful_metric_names)) - all_callbacks += (callbacks or []) + [self.history] - callbacks = cbks.CallbackList(all_callbacks) - out_labels = out_labels or [] - - # it's possible to callback a different model than self - # (used by Sequential models) - if hasattr(self, 'callback_model') and self.callback_model: - callback_model = self.callback_model - else: - callback_model = self - - callbacks.set_model(callback_model) - - callbacks.set_params({ - 'batch_size': batch_size, - 'epochs': epochs, - 'steps': steps_per_epoch, - 'samples': num_train_samples, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics or [], - }) - callbacks.on_train_begin() - callback_model.stop_training = False - for cbk in callbacks: - cbk.validation_data = val_ins - - # To prevent a slowdown, we find beforehand the arrays that need conversion. - feed = self._feed_inputs + self._feed_targets + self._feed_sample_weights - indices_for_conversion_to_dense = [] - for i in range(len(feed)): - if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): - indices_for_conversion_to_dense.append(i) - - for epoch in range(initial_epoch, epochs): - # Reset stateful metrics - for m in self.metrics: - if isinstance(m, Layer): - m.reset_states() - # Update callbacks - callbacks.on_epoch_begin(epoch) - epoch_logs = {} - if steps_per_epoch is not None: - for step_index in range(steps_per_epoch): - batch_logs = {} - batch_logs['batch'] = step_index - batch_logs['size'] = 1 - callbacks.on_batch_begin(step_index, batch_logs) - outs = f(ins) - - if not isinstance(outs, list): - outs = [outs] - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks.on_batch_end(step_index, batch_logs) - if callback_model.stop_training: - break - - if do_validation: - val_outs = self._test_loop( - val_f, - val_ins, - batch_size=batch_size, - steps=validation_steps, - verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - else: - if shuffle == 'batch': - index_array = _batch_shuffle(index_array, batch_size) - elif shuffle: - np.random.shuffle(index_array) - - batches = make_batches(num_train_samples, batch_size) - - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - try: - if isinstance(ins[-1], int): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - except TypeError: - raise TypeError('TypeError while preparing batch. ' - 'If using HDF5 input data, ' - 'pass shuffle="batch".') - batch_logs = {} - batch_logs['batch'] = batch_index - batch_logs['size'] = len(batch_ids) - callbacks.on_batch_begin(batch_index, batch_logs) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - outs = f(ins_batch) - if not isinstance(outs, list): - outs = [outs] - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks.on_batch_end(batch_index, batch_logs) - if callback_model.stop_training: - break - - if batch_index == len(batches) - 1: # Last batch. - if do_validation: - val_outs = self._test_loop( - val_f, val_ins, batch_size=batch_size, verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - callbacks.on_epoch_end(epoch, epoch_logs) - if callback_model.stop_training: - break - callbacks.on_train_end() - return self.history - - def _predict_loop(self, f, ins, batch_size=32, verbose=0, steps=None): - """Abstract method to loop over some data in batches. - - Arguments: - f: Keras function returning a list of tensors. - ins: list of tensors to be fed to `f`. - batch_size: integer batch size. - verbose: verbosity mode. - steps: Total number of steps (batches of samples) - before declaring `_predict_loop` finished. - Ignored with the default value of `None`. - - Returns: - Array of predictions (if the model has a single output) - or list of arrays of predictions - (if the model has multiple outputs). - """ - if hasattr(self, 'metrics'): - for m in self.metrics: - if isinstance(m, Layer): - m.reset_states() - - num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') - if verbose == 1: - if steps is not None: - progbar = Progbar(target=steps, - stateful_metrics=self.stateful_metric_names) - else: - progbar = Progbar(target=num_samples, - stateful_metrics=self.stateful_metric_names) - - indices_for_conversion_to_dense = [] - for i in range(len(self._feed_inputs)): - if (issparse is not None and issparse(ins[i]) and - not K.is_sparse(self._feed_inputs[i])): - indices_for_conversion_to_dense.append(i) - - if steps is not None: - # Step-based predictions. - # Since we do not know how many samples - # we will see, we cannot pre-allocate - # the returned Numpy arrays. - # Instead, we store one array per batch seen - # and concatenate them upon returning. - unconcatenated_outs = [] - for step in range(steps): - batch_outs = f(ins) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - if step == 0: - for batch_out in batch_outs: - unconcatenated_outs.append([]) - for i, batch_out in enumerate(batch_outs): - unconcatenated_outs[i].append(batch_out) - if verbose == 1: - progbar.update(step + 1) - if len(unconcatenated_outs) == 1: - return np.concatenate(unconcatenated_outs[0], axis=0) - return [ - np.concatenate(unconcatenated_outs[i], axis=0) - for i in range(len(unconcatenated_outs)) - ] - else: - # Sample-based predictions. - outs = [] - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], int): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - batch_outs = f(ins_batch) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - if batch_index == 0: - # Pre-allocate the results arrays. - for batch_out in batch_outs: - shape = (num_samples,) + batch_out.shape[1:] - outs.append(np.zeros(shape, dtype=batch_out.dtype)) - for i, batch_out in enumerate(batch_outs): - outs[i][batch_start:batch_end] = batch_out - if verbose == 1: - progbar.update(batch_end) - if len(outs) == 1: - return outs[0] - return outs - - def _test_loop(self, f, ins, batch_size=None, verbose=0, steps=None): - """Abstract method to loop over some data in batches. - - Arguments: - f: Keras function returning a list of tensors. - ins: list of tensors to be fed to `f`. - batch_size: integer batch size or `None`. - verbose: verbosity mode. - steps: Total number of steps (batches of samples) - before declaring predictions finished. - Ignored with the default value of `None`. - - Returns: - Scalar loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - """ - if hasattr(self, 'metrics'): - for m in self.metrics: - if isinstance(m, Layer): - m.reset_states() - stateful_metric_indices = [ - i for i, name in enumerate(self.metrics_names) - if str(name) in self.stateful_metric_names - ] - else: - stateful_metric_indices = [] - - num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') - outs = [] - if verbose == 1: - if steps is not None: - progbar = Progbar(target=steps) - else: - progbar = Progbar(target=num_samples) - - # To prevent a slowdown, we find beforehand the arrays that need conversion. - feed = self._feed_inputs + self._feed_targets + self._feed_sample_weights - indices_for_conversion_to_dense = [] - for i in range(len(feed)): - if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): - indices_for_conversion_to_dense.append(i) - - if steps is not None: - for step in range(steps): - batch_outs = f(ins) - if isinstance(batch_outs, list): - if step == 0: - for _ in enumerate(batch_outs): - outs.append(0.) - for i, batch_out in enumerate(batch_outs): - if i in stateful_metric_indices: - outs[i] = batch_out - else: - outs[i] += batch_out - else: - if step == 0: - outs.append(0.) - outs[0] += batch_outs - if verbose == 1: - progbar.update(step + 1) - for i in range(len(outs)): - if i not in stateful_metric_indices: - outs[i] /= steps - else: - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], int): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - batch_outs = f(ins_batch) - - if isinstance(batch_outs, list): - if batch_index == 0: - for batch_out in enumerate(batch_outs): - outs.append(0.) - for i, batch_out in enumerate(batch_outs): - if i in stateful_metric_indices: - outs[i] = batch_out - else: - outs[i] += batch_out * len(batch_ids) - else: - if batch_index == 0: - outs.append(0.) - outs[0] += batch_outs * len(batch_ids) - if verbose == 1: - progbar.update(batch_end) - for i in range(len(outs)): - if i not in stateful_metric_indices: - outs[i] /= num_samples - if len(outs) == 1: - return outs[0] - return outs - def _standardize_user_data(self, x, y=None, @@ -1688,7 +776,7 @@ class Model(Network): feed_input_shapes = self._feed_input_shapes # Standardize the inputs. - x = _standardize_input_data( + x = training_utils.standardize_input_data( x, feed_input_names, feed_input_shapes, @@ -1727,7 +815,7 @@ class Model(Network): feed_output_shapes.append(output_shape) # Standardize the outputs. - y = _standardize_input_data( + y = training_utils.standardize_input_data( y, feed_output_names, feed_output_shapes, @@ -1736,21 +824,21 @@ class Model(Network): # Generate sample-wise weight values given the `sample_weight` and # `class_weight` arguments. - sample_weights = _standardize_sample_weights(sample_weight, - feed_output_names) - class_weights = _standardize_class_weights(class_weight, - feed_output_names) + sample_weights = training_utils.standardize_sample_weights( + sample_weight, feed_output_names) + class_weights = training_utils.standardize_class_weights( + class_weight, feed_output_names) sample_weights = [ - _standardize_weights(ref, sw, cw, mode) + training_utils.standardize_weights(ref, sw, cw, mode) for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights, feed_sample_weight_modes) ] # Check that all arrays have the same length. - _check_array_lengths(x, y, sample_weights) + training_utils.check_array_lengths(x, y, sample_weights) if self._is_graph_network and not context.in_eager_mode(): # Additional checks to avoid users mistakenly using improper loss fns. - _check_loss_and_target_compatibility(y, self._feed_loss_fns, - feed_output_shapes) + training_utils.check_loss_and_target_compatibility( + y, self._feed_loss_fns, feed_output_shapes) else: y = [] sample_weights = [] @@ -2052,10 +1140,7 @@ class Model(Network): class_weight=class_weight, batch_size=batch_size) # Prepare validation data. - do_validation = False - val_ins = [] if validation_data: - do_validation = True if len(validation_data) == 2: val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence val_sample_weight = None @@ -2075,7 +1160,6 @@ class Model(Network): batch_size=batch_size) elif validation_split and 0. < validation_split < 1.: - do_validation = True if hasattr(x[0], 'shape'): split_at = int(x[0].shape[0] * (1. - validation_split)) else: @@ -2088,78 +1172,40 @@ class Model(Network): val_x = [] val_y = [] val_sample_weights = [] - do_validation = True - - # Prepare display labels. - out_labels = self.metrics_names + else: + val_x = None + val_y = None + val_sample_weights = None if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') - - if do_validation: - if any([w is not None for w in val_sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported' - ' when eager execution is enabled, for now.') - callback_metrics = copy.copy(out_labels) + [ - 'val_' + n for n in out_labels - ] - val_ins = val_x + val_y - else: - callback_metrics = copy.copy(out_labels) - return training_eager.fit_loop( self, - x + y, - out_labels=out_labels, + inputs=x, + targets=y, + sample_weights=sample_weights, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, - val_ins=val_ins, + val_inputs=val_x, + val_targets=val_y, + val_sample_weights=val_sample_weights, shuffle=shuffle, - callback_metrics=callback_metrics, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) else: - # Prepare input arrays and training function. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1] - else: - ins = x + y + sample_weights - - self._make_train_function() - f = self.train_function - - if do_validation: - self._make_test_function() - val_f = self.test_function - callback_metrics = copy.copy(out_labels) + [ - 'val_' + n for n in out_labels - ] - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0] - else: - val_ins = val_x + val_y + val_sample_weights - else: - val_f = None - callback_metrics = copy.copy(out_labels) - - # Delegate logic to `_fit_loop`. - return self._fit_loop( - f, - ins, - out_labels=out_labels, + return training_arrays.fit_loop( + self, x, y, + sample_weights=sample_weights, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, - val_f=val_f, - val_ins=val_ins, + val_inputs=val_x, + val_targets=val_y, + val_sample_weights=val_sample_weights, shuffle=shuffle, - callback_metrics=callback_metrics, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) @@ -2235,22 +1281,13 @@ class Model(Network): batch_size=batch_size) if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') return training_eager.test_loop( - self, x + y, batch_size=batch_size, verbose=verbose, steps=steps) + self, inputs=x, targets=y, sample_weights=sample_weights, + batch_size=batch_size, verbose=verbose, steps=steps) else: - # Prepare inputs, delegate logic to `_test_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0] - else: - ins = x + y + sample_weights - - self._make_test_function() - f = self.test_function - return self._test_loop( - f, ins, batch_size=batch_size, verbose=verbose, steps=steps) + return training_arrays.test_loop( + self, inputs=x, targets=y, sample_weights=sample_weights, + batch_size=batch_size, verbose=verbose, steps=steps) def predict(self, x, batch_size=None, verbose=0, steps=None): """Generates output predictions for the input samples. @@ -2288,17 +1325,8 @@ class Model(Network): return training_eager.predict_loop( self, x, batch_size=batch_size, verbose=verbose, steps=steps) else: - # Prepare inputs, delegate logic to `_predict_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0] - else: - ins = x - - self._make_predict_function() - f = self.predict_function - - return self._predict_loop( - f, ins, batch_size=batch_size, verbose=verbose, steps=steps) + return training_arrays.predict_loop( + self, x, batch_size=batch_size, verbose=verbose, steps=steps) def train_on_batch(self, x, y, sample_weight=None, class_weight=None): """Runs a single gradient update on a single batch of data. @@ -2345,10 +1373,8 @@ class Model(Network): class_weight=class_weight) if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') - outputs = training_eager.train_on_batch(self, x + y) + outputs = training_eager.train_on_batch( + self, x, y, sample_weights=sample_weights) else: if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + y + sample_weights + [1] @@ -2397,10 +1423,8 @@ class Model(Network): x, y, sample_weight=sample_weight) if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') - outputs = training_eager.test_on_batch(self, x + y) + outputs = training_eager.test_on_batch( + self, x, y, sample_weights=sample_weights) else: if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + y + sample_weights + [0] @@ -2426,16 +1450,8 @@ class Model(Network): x, _, _ = self._standardize_user_data(x) if context.in_eager_mode(): - ins_batch_converted = [] - for ib in x: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - - eager_model_inputs = [] - for i in range(len(self.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - - outs = self(eager_model_inputs) # pylint: disable=not-callable - return outs + inputs = [ops.convert_to_tensor(val, dtype=K.floatx()) for val in x] + return self(inputs) # pylint: disable=not-callable if context.in_graph_mode(): if self.uses_learning_phase and not isinstance(K.learning_phase(), int): @@ -2445,6 +1461,7 @@ class Model(Network): self._make_predict_function() outputs = self.predict_function(ins) + if len(outputs) == 1: return outputs[0] return outputs @@ -2560,213 +1577,21 @@ class Model(Network): raise NotImplementedError( '`fit_generator` is not yet enabled for Model subclasses') - wait_time = 0.01 # in seconds - epoch = initial_epoch - - do_validation = bool(validation_data) - self._make_train_function() - if do_validation: - self._make_test_function() - - is_sequence = isinstance(generator, Sequence) - if not is_sequence and use_multiprocessing and workers > 1: - logging.warning( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the`keras.utils.Sequence' - ' class.')) - if steps_per_epoch is None: - if is_sequence: - steps_per_epoch = len(generator) - else: - raise ValueError('`steps_per_epoch=None` is only valid for a' - ' generator based on the `keras.utils.Sequence`' - ' class. Please specify `steps_per_epoch` or use' - ' the `keras.utils.Sequence` class.') - - # python 2 has 'next', 3 has '__next__' - # avoid any explicit version checks - val_gen = ( - hasattr(validation_data, 'next') or - hasattr(validation_data, '__next__') or - isinstance(validation_data, Sequence)) - if (val_gen and not isinstance(validation_data, Sequence) and - not validation_steps): - raise ValueError('`validation_steps=None` is only valid for a' - ' generator based on the `keras.utils.Sequence`' - ' class. Please specify `validation_steps` or use' - ' the `keras.utils.Sequence` class.') - - # Prepare display labels. - out_labels = self.metrics_names - callback_metrics = out_labels + ['val_%s' % n for n in out_labels] - - # prepare callbacks - self.history = cbks.History() - callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history] - if verbose: - callbacks += [cbks.ProgbarLogger(count_mode='steps')] - callbacks = cbks.CallbackList(callbacks) - - # it's possible to callback a different model than self: - if hasattr(self, 'callback_model') and self.callback_model: - callback_model = self.callback_model - else: - callback_model = self - callbacks.set_model(callback_model) - callbacks.set_params({ - 'epochs': epochs, - 'steps': steps_per_epoch, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics, - }) - callbacks.on_train_begin() - - enqueuer = None - val_enqueuer = None - - try: - if do_validation: - if val_gen: - if workers > 0: - if isinstance(validation_data, Sequence): - val_enqueuer = OrderedEnqueuer( - validation_data, use_multiprocessing=use_multiprocessing) - if validation_steps is None: - validation_steps = len(validation_data) - else: - val_enqueuer = GeneratorEnqueuer( - validation_data, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - val_enqueuer.start(workers=workers, max_queue_size=max_queue_size) - validation_generator = val_enqueuer.get() - else: - validation_generator = validation_data - else: - if len(validation_data) == 2: - val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence - val_sample_weight = None - elif len(validation_data) == 3: - val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence - else: - raise ValueError( - '`validation_data` should be a tuple ' - '`(val_x, val_y, val_sample_weight)` ' - 'or `(val_x, val_y)`. Found: ' + str(validation_data)) - val_x, val_y, val_sample_weights = self._standardize_user_data( - val_x, val_y, val_sample_weight) - val_data = val_x + val_y + val_sample_weights - if self.uses_learning_phase and not isinstance( - K.learning_phase(), int): - val_data += [0] - for cbk in callbacks: - cbk.validation_data = val_data - - if workers > 0: - if is_sequence: - enqueuer = OrderedEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - output_generator = generator - - callback_model.stop_training = False - # Construct epoch logs. - epoch_logs = {} - while epoch < epochs: - callbacks.on_epoch_begin(epoch) - steps_done = 0 - batch_index = 0 - while steps_done < steps_per_epoch: - generator_output = next(output_generator) - - if not hasattr(generator_output, '__len__'): - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + str(generator_output)) - - if len(generator_output) == 2: - x, y = generator_output - sample_weight = None - elif len(generator_output) == 3: - x, y, sample_weight = generator_output - else: - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + str(generator_output)) - # build batch logs - batch_logs = {} - if isinstance(x, list): - batch_size = x[0].shape[0] - elif isinstance(x, dict): - batch_size = list(x.values())[0].shape[0] - else: - batch_size = x.shape[0] - batch_logs['batch'] = batch_index - batch_logs['size'] = batch_size - callbacks.on_batch_begin(batch_index, batch_logs) - - outs = self.train_on_batch( - x, y, sample_weight=sample_weight, class_weight=class_weight) - - if not isinstance(outs, list): - outs = [outs] - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks.on_batch_end(batch_index, batch_logs) - - batch_index += 1 - steps_done += 1 - - # Epoch finished. - if steps_done >= steps_per_epoch and do_validation: - if val_gen: - val_outs = self.evaluate_generator( - validation_generator, validation_steps, workers=0) - else: - # No need for try/except because - # data has already been validated. - val_outs = self.evaluate( - val_x, - val_y, - batch_size=batch_size, - sample_weight=val_sample_weights, - verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - - if callback_model.stop_training: - break - - callbacks.on_epoch_end(epoch, epoch_logs) - epoch += 1 - if callback_model.stop_training: - break - - finally: - try: - if enqueuer is not None: - enqueuer.stop() - finally: - if val_enqueuer is not None: - val_enqueuer.stop() - - callbacks.on_train_end() - return self.history + return training_generator.fit_generator( + self, + generator, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + initial_epoch=initial_epoch) def evaluate_generator(self, generator, @@ -2819,87 +1644,13 @@ class Model(Network): raise NotImplementedError( '`evaluate_generator` is not yet enabled for Model subclasses') - self._make_test_function() - - steps_done = 0 - wait_time = 0.01 - all_outs = [] - batch_sizes = [] - is_sequence = isinstance(generator, Sequence) - if not is_sequence and use_multiprocessing and workers > 1: - logging.warning( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the`keras.utils.Sequence' - ' class.')) - if steps is None: - if is_sequence: - steps = len(generator) - else: - raise ValueError('`steps=None` is only valid for a generator' - ' based on the `keras.utils.Sequence` class.' - ' Please specify `steps` or use the' - ' `keras.utils.Sequence` class.') - enqueuer = None - - try: - if workers > 0: - if is_sequence: - enqueuer = OrderedEnqueuer( - generator, use_multiprocessing=use_multiprocessing) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - output_generator = generator - - while steps_done < steps: - generator_output = next(output_generator) - if not hasattr(generator_output, '__len__'): - raise ValueError('Output of generator should be a tuple ' - '(x, y, sample_weight) ' - 'or (x, y). Found: ' + str(generator_output)) - if len(generator_output) == 2: - x, y = generator_output - sample_weight = None - elif len(generator_output) == 3: - x, y, sample_weight = generator_output - else: - raise ValueError('Output of generator should be a tuple ' - '(x, y, sample_weight) ' - 'or (x, y). Found: ' + str(generator_output)) - outs = self.test_on_batch(x, y, sample_weight=sample_weight) - - if isinstance(x, list): - batch_size = x[0].shape[0] - elif isinstance(x, dict): - batch_size = list(x.values())[0].shape[0] - else: - batch_size = x.shape[0] - if batch_size == 0: - raise ValueError('Received an empty batch. ' - 'Batches should at least contain one item.') - all_outs.append(outs) - - steps_done += 1 - batch_sizes.append(batch_size) - - finally: - if enqueuer is not None: - enqueuer.stop() - - if not isinstance(outs, list): - return np.average(np.asarray(all_outs), weights=batch_sizes) - else: - averages = [] - for i in range(len(outs)): - averages.append( - np.average([out[i] for out in all_outs], weights=batch_sizes)) - return averages + return training_generator.evaluate_generator( + self, + generator, + steps=steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing) def predict_generator(self, generator, @@ -2947,88 +1698,11 @@ class Model(Network): raise NotImplementedError( '`predict_generator` is not yet enabled for Model subclasses') - self._make_predict_function() - - steps_done = 0 - wait_time = 0.01 - all_outs = [] - is_sequence = isinstance(generator, Sequence) - if not is_sequence and use_multiprocessing and workers > 1: - logging.warning( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the`keras.utils.Sequence' - ' class.')) - if steps is None: - if is_sequence: - steps = len(generator) - else: - raise ValueError('`steps=None` is only valid for a generator' - ' based on the `keras.utils.Sequence` class.' - ' Please specify `steps` or use the' - ' `keras.utils.Sequence` class.') - enqueuer = None - - try: - if workers > 0: - if is_sequence: - enqueuer = OrderedEnqueuer( - generator, use_multiprocessing=use_multiprocessing) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - output_generator = generator - - if verbose == 1: - progbar = Progbar(target=steps) - - while steps_done < steps: - generator_output = next(output_generator) - if isinstance(generator_output, tuple): - # Compatibility with the generators - # used for training. - if len(generator_output) == 2: - x, _ = generator_output - elif len(generator_output) == 3: - x, _, _ = generator_output - else: - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + str(generator_output)) - else: - # Assumes a generator that only - # yields inputs (not targets and sample weights). - x = generator_output - - outs = self.predict_on_batch(x) - if not isinstance(outs, list): - outs = [outs] - - if not all_outs: - for out in outs: - all_outs.append([]) - - for i, out in enumerate(outs): - all_outs[i].append(out) - steps_done += 1 - if verbose == 1: - progbar.update(steps_done) - - finally: - if enqueuer is not None: - enqueuer.stop() - - if len(all_outs) == 1: - if steps_done == 1: - return all_outs[0][0] - else: - return np.concatenate(all_outs[0]) - if steps_done == 1: - return [out[0] for out in all_outs] - else: - return [np.concatenate(out) for out in all_outs] + return training_generator.predict_generator( + self, + generator, + steps=steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + verbose=verbose) diff --git a/tensorflow/python/keras/_impl/keras/engine/training_arrays.py b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py new file mode 100644 index 0000000000..9291ef5fe6 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py @@ -0,0 +1,495 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Part of the Keras training engine related to plain array data. +""" +# pylint: disable=protected-access +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy + +import numpy as np + +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import callbacks as cbks +from tensorflow.python.keras._impl.keras.engine import training_utils +from tensorflow.python.keras._impl.keras.engine.base_layer import Layer +from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches +from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar +from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays + +try: + from scipy.sparse import issparse # pylint: disable=g-import-not-at-top +except ImportError: + issparse = None + + +def fit_loop(model, + inputs, + targets, + sample_weights=None, + batch_size=None, + epochs=100, + verbose=1, + callbacks=None, + val_inputs=None, + val_targets=None, + val_sample_weights=None, + shuffle=True, + callback_metrics=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None): + """Abstract fit function for arrays of data. + + Arguments: + model: Keras Model instance. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + batch_size: Integer batch size or None if unknown. + epochs: Number of times to iterate over the data + verbose: Verbosity mode, 0, 1 or 2 + callbacks: List of callbacks to be called during training + val_inputs: List of input arrays. + val_targets: List of target arrays. + val_sample_weights: Optional list of sample weight arrays. + shuffle: Whether to shuffle the data at the beginning of each epoch + callback_metrics: List of strings, the display names of the metrics + passed to the callbacks. They should be the + concatenation of list the display names of the outputs of + `f` and the list of display names of the outputs of `f_val`. + initial_epoch: Epoch at which to start training + (useful for resuming a previous training run) + steps_per_epoch: Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. Ignored with the default value of `None`. + validation_steps: Number of steps to run validation for + (only if doing validation from data tensors). + Ignored with the default value of `None`. + + Returns: + `History` object. + + Raises: + ValueError: in case of invalid arguments. + """ + model._make_train_function() + f = model.train_function + + sample_weights = sample_weights or [] + val_sample_weights = val_sample_weights or [] + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = inputs + targets + sample_weights + [1] + if val_inputs: + val_ins = val_inputs + val_targets + val_sample_weights + [1] + else: + ins = inputs + targets + sample_weights + if val_inputs: + val_ins = val_inputs + val_targets + val_sample_weights + if not val_inputs: + val_ins = [] + + do_validation = False + if val_inputs: + do_validation = True + if verbose and inputs and hasattr(inputs[0], 'shape') and hasattr( + val_inputs[0], 'shape'): + print('Train on %d samples, validate on %d samples' % + (inputs[0].shape[0], val_inputs[0].shape[0])) + if validation_steps: + do_validation = True + if steps_per_epoch is None: + raise ValueError('Can only use `validation_steps` ' + 'when doing step-wise ' + 'training, i.e. `steps_per_epoch` ' + 'must be set.') + + out_labels = model.metrics_names + if do_validation: + callback_metrics = copy.copy(out_labels) + [ + 'val_' + n for n in out_labels + ] + else: + callback_metrics = copy.copy(out_labels) + + num_train_samples = training_utils.check_num_samples( + ins, batch_size, steps_per_epoch, 'steps_per_epoch') + if num_train_samples is not None: + index_array = np.arange(num_train_samples) + + model.history = cbks.History() + all_callbacks = [cbks.BaseLogger( + stateful_metrics=model.stateful_metric_names)] + if verbose: + if steps_per_epoch is not None: + count_mode = 'steps' + else: + count_mode = 'samples' + all_callbacks.append( + cbks.ProgbarLogger( + count_mode, stateful_metrics=model.stateful_metric_names)) + all_callbacks += (callbacks or []) + [model.history] + callbacks = cbks.CallbackList(all_callbacks) + out_labels = out_labels or [] + + # it's possible to callback a different model than self + # (used by Sequential models) + if hasattr(model, 'callback_model') and model.callback_model: + callback_model = model.callback_model + else: + callback_model = model + + callbacks.set_model(callback_model) + + callbacks.set_params({ + 'batch_size': batch_size, + 'epochs': epochs, + 'steps': steps_per_epoch, + 'samples': num_train_samples, + 'verbose': verbose, + 'do_validation': do_validation, + 'metrics': callback_metrics or [], + }) + callbacks.on_train_begin() + callback_model.stop_training = False + for cbk in callbacks: + cbk.validation_data = val_ins + + # To prevent a slowdown, we find beforehand the arrays that need conversion. + feed = model._feed_inputs + model._feed_targets + model._feed_sample_weights + indices_for_conversion_to_dense = [] + for i in range(len(feed)): + if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): + indices_for_conversion_to_dense.append(i) + + for epoch in range(initial_epoch, epochs): + # Reset stateful metrics + for m in model.metrics: + if isinstance(m, Layer): + m.reset_states() + # Update callbacks + callbacks.on_epoch_begin(epoch) + epoch_logs = {} + if steps_per_epoch is not None: + for step_index in range(steps_per_epoch): + batch_logs = {} + batch_logs['batch'] = step_index + batch_logs['size'] = 1 + callbacks.on_batch_begin(step_index, batch_logs) + outs = f(ins) + + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o + + callbacks.on_batch_end(step_index, batch_logs) + if callback_model.stop_training: + break + + if do_validation: + val_outs = test_loop( + model, + val_inputs, + val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + steps=validation_steps, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + else: + if shuffle == 'batch': + index_array = training_utils.batch_shuffle(index_array, batch_size) + elif shuffle: + np.random.shuffle(index_array) + + batches = make_batches(num_train_samples, batch_size) + + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + try: + if isinstance(ins[-1], int): + # Do not slice the training phase flag. + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = slice_arrays(ins, batch_ids) + except TypeError: + raise TypeError('TypeError while preparing batch. ' + 'If using HDF5 input data, ' + 'pass shuffle="batch".') + batch_logs = {} + batch_logs['batch'] = batch_index + batch_logs['size'] = len(batch_ids) + callbacks.on_batch_begin(batch_index, batch_logs) + for i in indices_for_conversion_to_dense: + ins_batch[i] = ins_batch[i].toarray() + + outs = f(ins_batch) + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o + + callbacks.on_batch_end(batch_index, batch_logs) + if callback_model.stop_training: + break + + if batch_index == len(batches) - 1: # Last batch. + if do_validation: + val_outs = test_loop( + model, + val_inputs, + val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + callbacks.on_epoch_end(epoch, epoch_logs) + if callback_model.stop_training: + break + callbacks.on_train_end() + return model.history + + +def predict_loop(model, inputs, batch_size=32, verbose=0, steps=None): + """Abstract method to loop over some data in batches. + + Arguments: + model: Keras Model instance. + inputs: list of tensors to be fed to `f`. + batch_size: integer batch size. + verbose: verbosity mode. + steps: Total number of steps (batches of samples) + before declaring `_predict_loop` finished. + Ignored with the default value of `None`. + + Returns: + Array of predictions (if the model has a single output) + or list of arrays of predictions + (if the model has multiple outputs). + """ + model._make_predict_function() + f = model.predict_function + + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = inputs + [0] + else: + ins = inputs + + if hasattr(model, 'metrics'): + for m in model.metrics: + if isinstance(m, Layer): + m.reset_states() + + num_samples = training_utils.check_num_samples( + inputs, batch_size, steps, 'steps') + if verbose == 1: + if steps is not None: + progbar = Progbar(target=steps, + stateful_metrics=model.stateful_metric_names) + else: + progbar = Progbar(target=num_samples, + stateful_metrics=model.stateful_metric_names) + + indices_for_conversion_to_dense = [] + for i in range(len(model._feed_inputs)): + if (issparse is not None and issparse(inputs[i]) and + not K.is_sparse(model._feed_inputs[i])): + indices_for_conversion_to_dense.append(i) + + if steps is not None: + # Step-based predictions. + # Since we do not know how many samples + # we will see, we cannot pre-allocate + # the returned Numpy arrays. + # Instead, we store one array per batch seen + # and concatenate them upon returning. + unconcatenated_outs = [] + for step in range(steps): + batch_outs = f(ins) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if step == 0: + for batch_out in batch_outs: + unconcatenated_outs.append([]) + for i, batch_out in enumerate(batch_outs): + unconcatenated_outs[i].append(batch_out) + if verbose == 1: + progbar.update(step + 1) + if len(unconcatenated_outs) == 1: + return np.concatenate(unconcatenated_outs[0], axis=0) + return [ + np.concatenate(unconcatenated_outs[i], axis=0) + for i in range(len(unconcatenated_outs)) + ] + else: + # Sample-based predictions. + outs = [] + batches = make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + if ins and isinstance(ins[-1], int): + # Do not slice the training phase flag. + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = slice_arrays(ins, batch_ids) + for i in indices_for_conversion_to_dense: + ins_batch[i] = ins_batch[i].toarray() + + batch_outs = f(ins_batch) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if batch_index == 0: + # Pre-allocate the results arrays. + for batch_out in batch_outs: + shape = (num_samples,) + batch_out.shape[1:] + outs.append(np.zeros(shape, dtype=batch_out.dtype)) + for i, batch_out in enumerate(batch_outs): + outs[i][batch_start:batch_end] = batch_out + if verbose == 1: + progbar.update(batch_end) + if len(outs) == 1: + return outs[0] + return outs + + +def test_loop(model, inputs, targets, + sample_weights=None, + batch_size=None, + verbose=0, + steps=None): + """Abstract method to loop over some data in batches. + + Arguments: + model: Keras Model instance. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + batch_size: integer batch size or `None`. + verbose: verbosity mode. + steps: Total number of steps (batches of samples) + before declaring predictions finished. + Ignored with the default value of `None`. + + Returns: + Scalar loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + """ + model._make_test_function() + f = model.test_function + + sample_weights = sample_weights or [] + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = inputs + targets + sample_weights + [0] + else: + ins = inputs + targets + sample_weights + + if hasattr(model, 'metrics'): + for m in model.metrics: + if isinstance(m, Layer): + m.reset_states() + stateful_metric_indices = [ + i for i, name in enumerate(model.metrics_names) + if str(name) in model.stateful_metric_names + ] + else: + stateful_metric_indices = [] + + num_samples = training_utils.check_num_samples( + ins, batch_size, steps, 'steps') + outs = [] + if verbose == 1: + if steps is not None: + progbar = Progbar(target=steps) + else: + progbar = Progbar(target=num_samples) + + # To prevent a slowdown, we find beforehand the arrays that need conversion. + feed = model._feed_inputs + model._feed_targets + model._feed_sample_weights + indices_for_conversion_to_dense = [] + for i in range(len(feed)): + if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): + indices_for_conversion_to_dense.append(i) + + if steps is not None: + for step in range(steps): + batch_outs = f(ins) + if isinstance(batch_outs, list): + if step == 0: + for _ in enumerate(batch_outs): + outs.append(0.) + for i, batch_out in enumerate(batch_outs): + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out + else: + if step == 0: + outs.append(0.) + outs[0] += batch_outs + if verbose == 1: + progbar.update(step + 1) + for i in range(len(outs)): + if i not in stateful_metric_indices: + outs[i] /= steps + else: + batches = make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + if isinstance(ins[-1], int): + # Do not slice the training phase flag. + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = slice_arrays(ins, batch_ids) + for i in indices_for_conversion_to_dense: + ins_batch[i] = ins_batch[i].toarray() + + batch_outs = f(ins_batch) + + if isinstance(batch_outs, list): + if batch_index == 0: + for batch_out in enumerate(batch_outs): + outs.append(0.) + for i, batch_out in enumerate(batch_outs): + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out * len(batch_ids) + else: + if batch_index == 0: + outs.append(0.) + outs[0] += batch_outs * len(batch_ids) + if verbose == 1: + progbar.update(batch_end) + for i in range(len(outs)): + if i not in stateful_metric_indices: + outs[i] /= num_samples + if len(outs) == 1: + return outs[0] + return outs diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index cdf189adef..75c96e6916 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -12,13 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Keras training and evaluation routines. +"""Keras training and evaluation routines for eager execution. """ # pylint: disable=protected-access from __future__ import absolute_import from __future__ import division from __future__ import print_function + +import copy + import numpy as np + from tensorflow.python.eager.backprop import GradientTape from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -26,6 +30,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module +from tensorflow.python.keras._impl.keras.engine import training_utils from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays @@ -99,15 +104,15 @@ def _eager_metrics_fn(model, outputs, targets): return metric_names, metric_results -def _model_loss(model, inputs, targets, training=False): +def _model_loss(model, inputs, targets, sample_weights=None, training=False): """Calculates the loss for a given model. Arguments: - model: The model on which metrics are being calculated. - inputs: The inputs of the given model. This is typically the mini batch of - data that is fed to the model. - targets: The predictions or targets of the given model. - training: Whether the model should be run in inference or training mode. + model: The model on which metrics are being calculated. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + training: Whether the model should be run in inference or training mode. Returns: Returns the model output, total loss and loss value calculated using the @@ -134,23 +139,20 @@ def _model_loss(model, inputs, targets, training=False): loss_metrics = [] with K.name_scope('loss'): for i, loss_fn in enumerate(model.loss_functions): - # compute the loss - output_loss = _eager_loss_fn(outs[i], targets[i], loss_fn, - model.output_names[i]) - loss_metrics.append(K.mean(output_loss)) + if sample_weights: + weights = sample_weights[i] + else: + weights = None # TODO(fchollet): support masking; in practice `_keras_mask` is never # set in this context currently. mask = outs[i]._keras_mask - # adapted from weighted_loss_fn - if mask is not None: - # mask should have the same shape as output_loss - output_loss *= mask - # the loss per batch should be proportional - # to the number of unmasked samples. - output_loss /= K.mean(mask) - # TODO(fchollet): support sample weighting + weighted_masked_fn = training_utils.weighted_masked_objective(loss_fn) + with K.name_scope(model.output_names[i] + '_loss'): + output_loss = weighted_masked_fn( + outs[i], targets[i], weights, mask=mask) + loss_metrics.append(K.mean(output_loss)) loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -171,16 +173,20 @@ def _model_loss(model, inputs, targets, training=False): return outs, total_loss, loss_metrics -def _process_single_batch(eager_model_inputs, eager_model_outputs, model, +def _process_single_batch(model, + inputs, + targets, + sample_weights=None, training=False): """Calculate the loss and gradient for one input batch. The model weights are updated if training is set to True. Arguments: - eager_model_inputs: Input batch data. - eager_model_outputs: Output batch data. model: Model whose loss has to be calculated. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. training: The boolean represents if the weights of the model are updated. 'fit' methods will set this to True while 'evaluate' methods will set this to False. @@ -193,8 +199,8 @@ def _process_single_batch(eager_model_inputs, eager_model_outputs, model, """ K.set_learning_phase(training) with GradientTape() as tape: - outs, loss, loss_metrics = _model_loss(model, eager_model_inputs, - eager_model_outputs, + outs, loss, loss_metrics = _model_loss(model, inputs, targets, + sample_weights=sample_weights, training=training) if loss is None: raise ValueError('The model cannot be run ' @@ -211,62 +217,61 @@ def _process_single_batch(eager_model_inputs, eager_model_outputs, model, return outs, loss, loss_metrics -def train_on_batch(model, ins): +def train_on_batch(model, inputs, targets, sample_weights=None): """Calculates the loss and gradient updates for one input batch. Arguments: - model: Given model on which loss and gradients are calculated. - ins: Input and output batch numpy arrays. + model: Model whose loss has to be calculated. + inputs: Input batch data. + targets: Target batch data. + sample_weights: Sample weight batch data. Returns: total loss and the loss associated with each output. """ - ins_batch_converted = [] - for ib in ins: - if ib is not None: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) + inputs = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs] + targets = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets] + sample_weights = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None for val in sample_weights] outs, loss, _ = _process_single_batch( - eager_model_inputs, eager_model_outputs, model, training=True) + model, inputs, targets, sample_weights=sample_weights, training=True) if not isinstance(outs, list): outs = [outs] _, metrics_results = _eager_metrics_fn( - model, outs, eager_model_outputs) + model, outs, targets) if not isinstance(loss, list): loss = [loss] return loss + metrics_results -def test_on_batch(model, ins): +def test_on_batch(model, inputs, targets, sample_weights=None): """Calculates the loss for one input batch. Arguments: - model: Given model on which loss is calculated. - ins: Input and output batch numpy arrays. + model: Model whose loss has to be calculated. + inputs: Input batch data. + targets: Target batch data. + sample_weights: Sample weight batch data. Returns: total loss, loss and metrics associated with each output. """ - ins_batch_converted = [] - for ib in ins: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) + inputs = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs] + targets = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets] + sample_weights = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None for val in sample_weights] outs, loss, loss_metrics = _process_single_batch( - eager_model_inputs, eager_model_outputs, model, training=False) + model, inputs, targets, sample_weights=sample_weights, training=False) if not isinstance(outs, list): outs = [outs] metric_names, metrics_results = _eager_metrics_fn( - model, outs, eager_model_outputs) + model, outs, targets) model.metrics_names.append(metric_names) if not isinstance(loss, list): loss = [loss] @@ -275,32 +280,35 @@ def test_on_batch(model, ins): def fit_loop( model, - ins, - out_labels=None, + inputs, + targets, + sample_weights=None, + val_inputs=None, + val_targets=None, + val_sample_weights=None, batch_size=None, epochs=100, verbose=1, callbacks=None, - val_ins=None, shuffle=True, callback_metrics=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None): - """Abstract fit function for `f(ins)`. - - Assume that f returns a list, labeled by out_labels. + """Abstract fit function for eager execution. Arguments: model: Instance of the model that is being executed in Eager mode. - ins: List of tensors to be fed to `f` - out_labels: List of strings, display names of - the outputs of `f` + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + val_inputs: Input data for validation. + val_targets: Target data for validation. + val_sample_weights: Sample weight data for validation. batch_size: Integer batch size or None if unknown. epochs: Number of times to iterate over the data verbose: Verbosity mode, 0, 1 or 2 callbacks: List of callbacks to be called during training - val_ins: List of tensors to be fed to `val_f` shuffle: Whether to shuffle the data at the beginning of each epoch callback_metrics: List of strings, the display names of the metrics passed to the callbacks. They should be the @@ -324,20 +332,35 @@ def fit_loop( K.set_learning_phase(True) do_validation = False - if val_ins: + if val_inputs: do_validation = True - if (verbose and ins and hasattr(ins[0], 'shape') and - hasattr(val_ins[0], 'shape')): + if (verbose and inputs and hasattr(inputs[0], 'shape') and + hasattr(val_inputs[0], 'shape')): print('Train on %d samples, validate on %d samples' % - (ins[0].shape[0], val_ins[0].shape[0])) + (inputs[0].shape[0], val_inputs[0].shape[0])) if validation_steps: if steps_per_epoch is None: raise ValueError('Can only use `validation_steps` when doing step-wise ' 'training, i.e. `steps_per_epoch` must be set.') do_validation = True - num_train_samples = model._check_num_samples( - ins, batch_size, steps_per_epoch, 'steps_per_epoch') + out_labels = model.metrics_names + if do_validation: + callback_metrics = copy.copy(out_labels) + [ + 'val_' + n for n in out_labels + ] + else: + callback_metrics = copy.copy(out_labels) + + if sample_weights: + feed_data = inputs + targets + sample_weights + else: + feed_data = inputs + targets + num_train_samples = training_utils.check_num_samples( + feed_data, + batch_size=batch_size, + steps=steps_per_epoch, + steps_name='steps_per_epoch') if num_train_samples is not None: index_array = np.arange(num_train_samples) @@ -351,7 +374,6 @@ def fit_loop( count_mode = 'samples' callbacks += [cbks.ProgbarLogger(count_mode)] callbacks = cbks.CallbackList(callbacks) - out_labels = out_labels or [] # it's possible to callback a different model than self # (used by Sequential models) @@ -374,7 +396,12 @@ def fit_loop( callbacks.on_train_begin() callback_model.stop_training = False for cbk in callbacks: - cbk.validation_data = val_ins + if not val_inputs: + cbk.validation_data = [] + elif val_sample_weights: + cbk.validation_data = val_inputs + val_targets + val_sample_weights + else: + cbk.validation_data = val_inputs + val_targets for epoch in range(initial_epoch, epochs): callbacks.on_epoch_begin(epoch) @@ -389,11 +416,12 @@ def fit_loop( for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - if isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + inputs_batch = slice_arrays(inputs, batch_ids) + targets_batch = slice_arrays(targets, batch_ids) + if sample_weights: + sample_weights_batch = slice_arrays(sample_weights, batch_ids) else: - ins_batch = slice_arrays(ins, batch_ids) + sample_weights_batch = None except TypeError: raise TypeError('TypeError while preparing batch. ' 'If using HDF5 input data, ' @@ -404,21 +432,22 @@ def fit_loop( callbacks.on_batch_begin(batch_index, batch_logs) - ins_batch_converted = [] - for ib in ins_batch: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) - - outs, loss, loss_metrics = _process_single_batch(eager_model_inputs, - eager_model_outputs, - model, - training=True) + inputs_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] + targets_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets_batch] + if sample_weights: + sample_weights_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None + for val in sample_weights_batch] + + outs, loss, loss_metrics = _process_single_batch( + model, + inputs_batch, + targets_batch, + sample_weights=sample_weights_batch, + training=True) if not isinstance(outs, list): outs = [outs] @@ -426,8 +455,8 @@ def fit_loop( for l, o in zip(out_labels, outs): batch_logs[l] = o # Required for Eager mode - metrics_names, metrics_results = _eager_metrics_fn(model, outs, - eager_model_outputs) + metrics_names, metrics_results = _eager_metrics_fn( + model, outs, targets_batch) batch_logs['loss'] = tensor_util.constant_value(K.mean(loss)) # TODO(anjalisridhar): Move this to compile to avoid duplicate code. @@ -461,7 +490,10 @@ def fit_loop( if batch_index == len(batches) - 1: # Last batch. if do_validation: val_outs = test_loop( - model, val_ins, batch_size=batch_size, verbose=0) + model, val_inputs, val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + verbose=0) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. @@ -474,12 +506,18 @@ def fit_loop( return model.history -def test_loop(model, ins, batch_size=None, verbose=0, steps=None): +def test_loop(model, inputs, targets, + sample_weights=None, + batch_size=None, + verbose=0, + steps=None): """Abstract method to loop over some data in batches. Arguments: model: Model instance that is being evaluated in Eager mode. - ins: list of tensors to be fed to `f`. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. batch_size: integer batch size or `None`. verbose: verbosity mode. steps: Total number of steps (batches of samples) @@ -493,7 +531,11 @@ def test_loop(model, ins, batch_size=None, verbose=0, steps=None): the display labels for the scalar outputs. """ K.set_learning_phase(False) - num_samples = model._check_num_samples(ins, batch_size, steps, 'steps') + feed_data = inputs + targets + if sample_weights: + feed_data += sample_weights + num_samples = training_utils.check_num_samples( + feed_data, batch_size=batch_size, steps=steps, steps_name='steps') outs = [] if verbose == 1: progbar = Progbar(target=num_samples) @@ -501,29 +543,30 @@ def test_loop(model, ins, batch_size=None, verbose=0, steps=None): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + inputs_batch = slice_arrays(inputs, batch_ids) + targets_batch = slice_arrays(targets, batch_ids) + if sample_weights: + sample_weights_batch = slice_arrays(sample_weights, batch_ids) else: - ins_batch = slice_arrays(ins, batch_ids) - - ins_batch_converted = [] - for ib in ins_batch: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) - - loss_outs, loss, loss_metrics = _model_loss(model, eager_model_inputs, - eager_model_outputs, - training=False) - _, metrics_results = _eager_metrics_fn(model, loss_outs, - eager_model_outputs) + sample_weights_batch = None + + inputs_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] + targets_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets_batch] + if sample_weights: + sample_weights_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None + for val in sample_weights_batch] + + loss_outs, loss, loss_metrics = _model_loss( + model, + inputs_batch, + targets_batch, + sample_weights=sample_weights_batch, + training=False) + _, metrics_results = _eager_metrics_fn(model, loss_outs, targets_batch) batch_outs = [] for _, v in zip(model.metrics_names, [K.mean(loss)] + loss_metrics + metrics_results): @@ -549,12 +592,15 @@ def test_loop(model, ins, batch_size=None, verbose=0, steps=None): return outs -def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): +def predict_loop(model, inputs, + batch_size=32, + verbose=0, + steps=None): """Abstract method to loop over some data in batches. Arguments: model: - ins: list of tensors to be fed to `f`. + inputs: List of input arrays. batch_size: integer batch size. verbose: verbosity mode. steps: Total number of steps (batches of samples) @@ -567,7 +613,8 @@ def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): (if the model has multiple outputs). """ K.set_learning_phase(False) - num_samples = model._check_num_samples(ins, batch_size, steps, 'steps') + num_samples = training_utils.check_num_samples( + inputs, batch_size, steps, 'steps') if verbose == 1: if steps is not None: progbar = Progbar(target=steps) @@ -579,30 +626,21 @@ def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - - ins_batch_converted = [] - for ib in ins_batch: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) + inputs_batch = slice_arrays(inputs, batch_ids) - eager_model_inputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) + inputs_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] - if len(eager_model_inputs) == 1: + if len(inputs_batch) == 1: if model._expects_training_arg: - batch_outs = model.call(eager_model_inputs[0], training=False) + batch_outs = model.call(inputs_batch[0], training=False) else: - batch_outs = model.call(eager_model_inputs[0]) + batch_outs = model.call(inputs_batch[0]) else: if model._expects_training_arg: - batch_outs = model.call(eager_model_inputs, training=False) + batch_outs = model.call(inputs_batch, training=False) else: - batch_outs = model.call(eager_model_inputs) + batch_outs = model.call(inputs_batch) if not isinstance(batch_outs, list): batch_outs = [batch_outs] diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 550b86a71d..8848b393d5 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -309,6 +309,229 @@ class TrainingTest(test.TestCase): optimizer='rms') +class LossWeightingTest(test.TestCase): + + def test_class_weights(self): + num_classes = 5 + batch_size = 5 + weighted_class = 3 + train_samples = 300 + test_samples = 300 + input_dim = 5 + + model = keras.models.Sequential() + model.add(keras.layers.Dense(10, input_shape=(input_dim,))) + model.add(keras.layers.Activation('relu')) + model.add(keras.layers.Dense(num_classes)) + model.add(keras.layers.Activation('softmax')) + model.compile(loss='categorical_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + + np.random.seed(1337) + (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + int_y_test = y_test.copy() + int_y_train = y_train.copy() + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + test_ids = np.where(int_y_test == np.array(weighted_class))[0] + + class_weight = dict([(i, 1.) for i in range(num_classes)]) + class_weight[weighted_class] = 4. + + sample_weight = np.ones((y_train.shape[0])) + sample_weight[int_y_train == weighted_class] = 4. + + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + class_weight=class_weight, + validation_data=(x_train, y_train, sample_weight)) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + class_weight=class_weight) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + class_weight=class_weight, + validation_split=0.1) + + model.train_on_batch( + x_train[:batch_size], y_train[:batch_size], class_weight=class_weight) + ref_score = model.evaluate(x_test, y_test, verbose=0) + score = model.evaluate( + x_test[test_ids, :], y_test[test_ids, :], verbose=0) + self.assertLess(score, ref_score) + + def test_sample_weights(self): + num_classes = 5 + batch_size = 5 + weighted_class = 3 + train_samples = 300 + test_samples = 300 + input_dim = 5 + + model = keras.models.Sequential() + model.add(keras.layers.Dense(10, input_shape=(input_dim,))) + model.add(keras.layers.Activation('relu')) + model.add(keras.layers.Dense(num_classes)) + model.add(keras.layers.Activation('softmax')) + model.compile(loss='categorical_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + + np.random.seed(43) + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + int_y_train = y_train.copy() + y_train = keras.utils.to_categorical(y_train, num_classes) + + class_weight = dict([(i, 1.) for i in range(num_classes)]) + class_weight[weighted_class] = 4. + + sample_weight = np.ones((y_train.shape[0])) + sample_weight[int_y_train == weighted_class] = 4. + + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + sample_weight=sample_weight) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + sample_weight=sample_weight, + validation_split=0.1) + model.train_on_batch( + x_train[:batch_size], + y_train[:batch_size], + sample_weight=sample_weight[:batch_size]) + model.test_on_batch( + x_train[:batch_size], + y_train[:batch_size], + sample_weight=sample_weight[:batch_size]) + + def test_temporal_sample_weights(self): + num_classes = 5 + weighted_class = 3 + train_samples = 1000 + test_samples = 1000 + input_dim = 5 + timesteps = 3 + + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(num_classes), + input_shape=(timesteps, input_dim))) + model.add(keras.layers.Activation('softmax')) + + np.random.seed(1337) + (_, y_train), _ = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + int_y_train = y_train.copy() + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + + class_weight = dict([(i, 1.) for i in range(num_classes)]) + class_weight[weighted_class] = 2. + + sample_weight = np.ones((y_train.shape[0])) + sample_weight[int_y_train == weighted_class] = 2. + with self.assertRaises(ValueError): + model.compile( + loss='binary_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001), + sample_weight_mode='temporal') + + def test_class_weight_invalid_use_case(self): + num_classes = 5 + train_samples = 1000 + test_samples = 1000 + input_dim = 5 + timesteps = 3 + + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(num_classes), + input_shape=(timesteps, input_dim))) + model.add(keras.layers.Activation('softmax')) + model.compile( + loss='binary_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + class_weight = dict([(i, 1.) for i in range(num_classes)]) + + del class_weight[1] + with self.assertRaises(ValueError): + model.fit(x_train, y_train, + epochs=0, verbose=0, class_weight=class_weight) + + with self.assertRaises(ValueError): + model.compile( + loss='binary_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001), + sample_weight_mode=[]) + + # Build multi-output model + x = keras.Input((3,)) + y1 = keras.layers.Dense(4, name='1')(x) + y2 = keras.layers.Dense(4, name='2')(x) + model = keras.models.Model(x, [y1, y2]) + model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse') + x_np = np.random.random((10, 3)) + y_np = np.random.random((10, 4)) + w_np = np.random.random((10,)) + # This will work + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': w_np}) + # These will not + with self.assertRaises(ValueError): + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=[w_np]) + with self.assertRaises(TypeError): + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=w_np) + with self.assertRaises(ValueError): + bad_w_np = np.random.random((11,)) + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) + with self.assertRaises(ValueError): + bad_w_np = np.random.random((10, 2)) + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) + with self.assertRaises(ValueError): + bad_w_np = np.random.random((10, 2, 2)) + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) + + if __name__ == '__main__': # Bazel sets these environment variables to very long paths. # Tempfile uses them to create long paths, and in turn multiprocessing diff --git a/tensorflow/python/keras/_impl/keras/engine/training_generator.py b/tensorflow/python/keras/_impl/keras/engine/training_generator.py new file mode 100644 index 0000000000..4af62c85d5 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/training_generator.py @@ -0,0 +1,439 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Part of the Keras training engine related to Python generators of array data. +""" +# pylint: disable=protected-access +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import callbacks as cbks +from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer +from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer +from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence +from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar +from tensorflow.python.platform import tf_logging as logging + + +def fit_generator(model, + generator, + steps_per_epoch=None, + epochs=1, + verbose=1, + callbacks=None, + validation_data=None, + validation_steps=None, + class_weight=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + shuffle=True, + initial_epoch=0): + """See docstring for `Model.fit_generator`.""" + wait_time = 0.01 # in seconds + epoch = initial_epoch + + do_validation = bool(validation_data) + model._make_train_function() + if do_validation: + model._make_test_function() + + is_sequence = isinstance(generator, Sequence) + if not is_sequence and use_multiprocessing and workers > 1: + logging.warning( + UserWarning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) + if steps_per_epoch is None: + if is_sequence: + steps_per_epoch = len(generator) + else: + raise ValueError('`steps_per_epoch=None` is only valid for a' + ' generator based on the `keras.utils.Sequence`' + ' class. Please specify `steps_per_epoch` or use' + ' the `keras.utils.Sequence` class.') + + # python 2 has 'next', 3 has '__next__' + # avoid any explicit version checks + val_gen = ( + hasattr(validation_data, 'next') or + hasattr(validation_data, '__next__') or + isinstance(validation_data, Sequence)) + if (val_gen and not isinstance(validation_data, Sequence) and + not validation_steps): + raise ValueError('`validation_steps=None` is only valid for a' + ' generator based on the `keras.utils.Sequence`' + ' class. Please specify `validation_steps` or use' + ' the `keras.utils.Sequence` class.') + + # Prepare display labels. + out_labels = model.metrics_names + callback_metrics = out_labels + ['val_%s' % n for n in out_labels] + + # prepare callbacks + model.history = cbks.History() + callbacks = [cbks.BaseLogger()] + (callbacks or []) + [model.history] + if verbose: + callbacks += [cbks.ProgbarLogger(count_mode='steps')] + callbacks = cbks.CallbackList(callbacks) + + # it's possible to callback a different model than self: + if hasattr(model, 'callback_model') and model.callback_model: + callback_model = model.callback_model + else: + callback_model = model + callbacks.set_model(callback_model) + callbacks.set_params({ + 'epochs': epochs, + 'steps': steps_per_epoch, + 'verbose': verbose, + 'do_validation': do_validation, + 'metrics': callback_metrics, + }) + callbacks.on_train_begin() + + enqueuer = None + val_enqueuer = None + + try: + if do_validation: + if val_gen: + if workers > 0: + if isinstance(validation_data, Sequence): + val_enqueuer = OrderedEnqueuer( + validation_data, use_multiprocessing=use_multiprocessing) + if validation_steps is None: + validation_steps = len(validation_data) + else: + val_enqueuer = GeneratorEnqueuer( + validation_data, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + val_enqueuer.start(workers=workers, max_queue_size=max_queue_size) + validation_generator = val_enqueuer.get() + else: + validation_generator = validation_data + else: + if len(validation_data) == 2: + val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence + val_sample_weight = None + elif len(validation_data) == 3: + val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence + else: + raise ValueError( + '`validation_data` should be a tuple ' + '`(val_x, val_y, val_sample_weight)` ' + 'or `(val_x, val_y)`. Found: ' + str(validation_data)) + val_x, val_y, val_sample_weights = model._standardize_user_data( + val_x, val_y, val_sample_weight) + val_data = val_x + val_y + val_sample_weights + if model.uses_learning_phase and not isinstance( + K.learning_phase(), int): + val_data += [0] + for cbk in callbacks: + cbk.validation_data = val_data + + if workers > 0: + if is_sequence: + enqueuer = OrderedEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle) + else: + enqueuer = GeneratorEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + output_generator = enqueuer.get() + else: + output_generator = generator + + callback_model.stop_training = False + # Construct epoch logs. + epoch_logs = {} + while epoch < epochs: + callbacks.on_epoch_begin(epoch) + steps_done = 0 + batch_index = 0 + while steps_done < steps_per_epoch: + generator_output = next(output_generator) + + if not hasattr(generator_output, '__len__'): + raise ValueError('Output of generator should be ' + 'a tuple `(x, y, sample_weight)` ' + 'or `(x, y)`. Found: ' + str(generator_output)) + + if len(generator_output) == 2: + x, y = generator_output + sample_weight = None + elif len(generator_output) == 3: + x, y, sample_weight = generator_output + else: + raise ValueError('Output of generator should be ' + 'a tuple `(x, y, sample_weight)` ' + 'or `(x, y)`. Found: ' + str(generator_output)) + # build batch logs + batch_logs = {} + if isinstance(x, list): + batch_size = x[0].shape[0] + elif isinstance(x, dict): + batch_size = list(x.values())[0].shape[0] + else: + batch_size = x.shape[0] + batch_logs['batch'] = batch_index + batch_logs['size'] = batch_size + callbacks.on_batch_begin(batch_index, batch_logs) + + outs = model.train_on_batch( + x, y, sample_weight=sample_weight, class_weight=class_weight) + + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o + + callbacks.on_batch_end(batch_index, batch_logs) + + batch_index += 1 + steps_done += 1 + + # Epoch finished. + if steps_done >= steps_per_epoch and do_validation: + if val_gen: + val_outs = evaluate_generator( + model, validation_generator, validation_steps, workers=0) + else: + # No need for try/except because + # data has already been validated. + val_outs = model.evaluate( + val_x, + val_y, + batch_size=batch_size, + sample_weight=val_sample_weights, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + + if callback_model.stop_training: + break + + callbacks.on_epoch_end(epoch, epoch_logs) + epoch += 1 + if callback_model.stop_training: + break + + finally: + try: + if enqueuer is not None: + enqueuer.stop() + finally: + if val_enqueuer is not None: + val_enqueuer.stop() + + callbacks.on_train_end() + return model.history + + +def evaluate_generator(model, + generator, + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False): + """See docstring for `Model.evaluate_generator`.""" + model._make_test_function() + + steps_done = 0 + wait_time = 0.01 + all_outs = [] + batch_sizes = [] + is_sequence = isinstance(generator, Sequence) + if not is_sequence and use_multiprocessing and workers > 1: + logging.warning( + UserWarning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) + if steps is None: + if is_sequence: + steps = len(generator) + else: + raise ValueError('`steps=None` is only valid for a generator' + ' based on the `keras.utils.Sequence` class.' + ' Please specify `steps` or use the' + ' `keras.utils.Sequence` class.') + enqueuer = None + + try: + if workers > 0: + if is_sequence: + enqueuer = OrderedEnqueuer( + generator, use_multiprocessing=use_multiprocessing) + else: + enqueuer = GeneratorEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + output_generator = enqueuer.get() + else: + output_generator = generator + + while steps_done < steps: + generator_output = next(output_generator) + if not hasattr(generator_output, '__len__'): + raise ValueError('Output of generator should be a tuple ' + '(x, y, sample_weight) ' + 'or (x, y). Found: ' + str(generator_output)) + if len(generator_output) == 2: + x, y = generator_output + sample_weight = None + elif len(generator_output) == 3: + x, y, sample_weight = generator_output + else: + raise ValueError('Output of generator should be a tuple ' + '(x, y, sample_weight) ' + 'or (x, y). Found: ' + str(generator_output)) + outs = model.test_on_batch(x, y, sample_weight=sample_weight) + + if isinstance(x, list): + batch_size = x[0].shape[0] + elif isinstance(x, dict): + batch_size = list(x.values())[0].shape[0] + else: + batch_size = x.shape[0] + if batch_size == 0: + raise ValueError('Received an empty batch. ' + 'Batches should at least contain one item.') + all_outs.append(outs) + + steps_done += 1 + batch_sizes.append(batch_size) + + finally: + if enqueuer is not None: + enqueuer.stop() + + if not isinstance(outs, list): + return np.average(np.asarray(all_outs), weights=batch_sizes) + else: + averages = [] + for i in range(len(outs)): + averages.append( + np.average([out[i] for out in all_outs], weights=batch_sizes)) + return averages + + +def predict_generator(model, + generator, + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + verbose=0): + """See docstring for `Model.predict_generator`.""" + model._make_predict_function() + + steps_done = 0 + wait_time = 0.01 + all_outs = [] + is_sequence = isinstance(generator, Sequence) + if not is_sequence and use_multiprocessing and workers > 1: + logging.warning( + UserWarning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) + if steps is None: + if is_sequence: + steps = len(generator) + else: + raise ValueError('`steps=None` is only valid for a generator' + ' based on the `keras.utils.Sequence` class.' + ' Please specify `steps` or use the' + ' `keras.utils.Sequence` class.') + enqueuer = None + + try: + if workers > 0: + if is_sequence: + enqueuer = OrderedEnqueuer( + generator, use_multiprocessing=use_multiprocessing) + else: + enqueuer = GeneratorEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + output_generator = enqueuer.get() + else: + output_generator = generator + + if verbose == 1: + progbar = Progbar(target=steps) + + while steps_done < steps: + generator_output = next(output_generator) + if isinstance(generator_output, tuple): + # Compatibility with the generators + # used for training. + if len(generator_output) == 2: + x, _ = generator_output + elif len(generator_output) == 3: + x, _, _ = generator_output + else: + raise ValueError('Output of generator should be ' + 'a tuple `(x, y, sample_weight)` ' + 'or `(x, y)`. Found: ' + str(generator_output)) + else: + # Assumes a generator that only + # yields inputs (not targets and sample weights). + x = generator_output + + outs = model.predict_on_batch(x) + if not isinstance(outs, list): + outs = [outs] + + if not all_outs: + for out in outs: + all_outs.append([]) + + for i, out in enumerate(outs): + all_outs[i].append(out) + steps_done += 1 + if verbose == 1: + progbar.update(steps_done) + + finally: + if enqueuer is not None: + enqueuer.stop() + + if len(all_outs) == 1: + if steps_done == 1: + return all_outs[0][0] + else: + return np.concatenate(all_outs[0]) + if steps_done == 1: + return [out[0] for out in all_outs] + else: + return [np.concatenate(out) for out in all_outs] diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index 6ca5941e9a..38ba0f0eae 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -25,7 +25,7 @@ import numpy as np from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils -from tensorflow.python.keras._impl.keras.engine.training import _weighted_masked_objective +from tensorflow.python.keras._impl.keras.engine.training_utils import weighted_masked_objective from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test @@ -705,7 +705,7 @@ class LossMaskingTest(test.TestCase): def test_loss_masking(self): with self.test_session(): - weighted_loss = _weighted_masked_objective(keras.losses.get('mae')) + weighted_loss = weighted_masked_objective(keras.losses.get('mae')) shape = (3, 4, 2) x = np.arange(24).reshape(shape) y = 2 * x @@ -1037,16 +1037,16 @@ class TestGeneratorMethods(test.TestCase): class TestTrainingUtils(test.TestCase): def test_check_array_lengths(self): - keras.engine.training._check_array_lengths(None, None, None) + keras.engine.training_utils.check_array_lengths(None, None, None) a_np = np.random.random((4, 3, 3)) - keras.engine.training._check_array_lengths(a_np, a_np, a_np) - keras.engine.training._check_array_lengths( + keras.engine.training_utils.check_array_lengths(a_np, a_np, a_np) + keras.engine.training_utils.check_array_lengths( [a_np, a_np], [a_np, a_np], [a_np, a_np]) - keras.engine.training._check_array_lengths([None], [None], [None]) + keras.engine.training_utils.check_array_lengths([None], [None], [None]) b_np = np.random.random((3, 4)) with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [b_np], None) + keras.engine.training_utils.check_array_lengths([a_np], [b_np], None) def test_slice_arrays(self): input_a = np.random.random((10, 3)) diff --git a/tensorflow/python/keras/_impl/keras/engine/training_utils.py b/tensorflow/python/keras/_impl/keras/engine/training_utils.py new file mode 100644 index 0000000000..105638ce10 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/training_utils.py @@ -0,0 +1,534 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Training-related utilities. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy + +import numpy as np + +from tensorflow.python.framework import tensor_util +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import losses + + +def check_num_samples(ins, + batch_size=None, + steps=None, + steps_name='steps'): + """Determine the number of samples provided for training and evaluation. + + The number of samples is not defined when running with `steps`, + in which case the number of samples is set to `None`. + + Arguments: + ins: List of tensors to be fed to the Keras function. + batch_size: Integer batch size or `None` if not defined. + steps: Total number of steps (batches of samples) + before declaring `_predict_loop` finished. + Ignored with the default value of `None`. + steps_name: The public API's parameter name for `steps`. + + Raises: + ValueError: when `steps` is `None` and the attribute `ins.shape` + does not exist. Also raises ValueError when `steps` is not `None` + and `batch_size` is not `None` because they are mutually + exclusive. + + Returns: + When steps is `None`, returns the number of samples to be + processed based on the size of the first dimension of the + first input numpy array. When steps is not `None` and + `batch_size` is `None`, returns `None`. + + Raises: + ValueError: In case of invalid arguments. + """ + if steps is not None: + num_samples = None + if batch_size is not None: + raise ValueError( + 'If ' + steps_name + ' is set, the `batch_size` must be None.') + elif ins and hasattr(ins[0], 'shape'): + num_samples = ins[0].shape[0] + else: + raise ValueError( + 'Either the input data should have ' + 'a defined shape, or ' + steps_name + ' should be specified.') + return num_samples + + +def standardize_input_data(data, + names, + shapes=None, + check_batch_axis=True, + exception_prefix=''): + """Normalizes inputs and targets provided by users. + + Users may pass data as a list of arrays, dictionary of arrays, + or as a single array. We normalize this to an ordered list of + arrays (same order as `names`), while checking that the provided + arrays have shapes that match the network's expectations. + + Arguments: + data: User-provided input data (polymorphic). + names: List of expected array names. + shapes: Optional list of expected array shapes. + check_batch_axis: Boolean; whether to check that + the batch axis of the arrays matches the expected + value found in `shapes`. + exception_prefix: String prefix used for exception formatting. + + Returns: + List of standardized input arrays (one array per model input). + + Raises: + ValueError: in case of improperly formatted user-provided data. + """ + if not names: + if data is not None and hasattr(data, '__len__') and len(data): + raise ValueError('Error when checking model ' + exception_prefix + ': ' + 'expected no data, but got:', data) + return [] + if data is None: + return [None for _ in range(len(names))] + + if isinstance(data, dict): + try: + data = [ + data[x].values + if data[x].__class__.__name__ == 'DataFrame' else data[x] + for x in names + ] + except KeyError as e: + raise ValueError('No data provided for "' + e.args[0] + '". Need data ' + 'for each key in: ' + str(names)) + elif isinstance(data, list): + if isinstance(data[0], list): + data = [np.asarray(d) for d in data] + elif len(names) == 1 and isinstance(data[0], (float, int)): + data = [np.asarray(data)] + else: + data = [ + x.values if x.__class__.__name__ == 'DataFrame' else x for x in data + ] + else: + data = data.values if data.__class__.__name__ == 'DataFrame' else data + data = [data] + data = [ + np.expand_dims(x, 1) if x is not None and x.ndim == 1 else x for x in data + ] + + if len(data) != len(names): + if data and hasattr(data[0], 'shape'): + raise ValueError('Error when checking model ' + exception_prefix + + ': the list of Numpy arrays that you are passing to ' + 'your model is not the size the model expected. ' + 'Expected to see ' + str(len(names)) + ' array(s), ' + 'but instead got the following list of ' + + str(len(data)) + ' arrays: ' + str(data)[:200] + '...') + elif len(names) > 1: + raise ValueError( + 'Error when checking model ' + exception_prefix + + ': you are passing a list as input to your model, ' + 'but the model expects a list of ' + str(len(names)) + + ' Numpy arrays instead. The list you passed was: ' + str(data)[:200]) + elif len(data) == 1 and not hasattr(data[0], 'shape'): + raise TypeError('Error when checking model ' + exception_prefix + + ': data should be a Numpy array, or list/dict of ' + 'Numpy arrays. Found: ' + str(data)[:200] + '...') + elif len(names) == 1: + data = [np.asarray(data)] + + # Check shapes compatibility. + if shapes: + for i in range(len(names)): + if shapes[i] is not None: + data_shape = data[i].shape + shape = shapes[i] + if data[i].ndim != len(shape): + raise ValueError('Error when checking ' + exception_prefix + + ': expected ' + names[i] + ' to have ' + + str(len(shape)) + ' dimensions, but got array ' + 'with shape ' + str(data_shape)) + if not check_batch_axis: + data_shape = data_shape[1:] + shape = shape[1:] + for dim, ref_dim in zip(data_shape, shape): + if ref_dim != dim and ref_dim: + raise ValueError( + 'Error when checking ' + exception_prefix + ': expected ' + + names[i] + ' to have shape ' + str(shape) + + ' but got array with shape ' + str(data_shape)) + return data + + +def standardize_sample_or_class_weights(x_weight, output_names, weight_type): + """Maps `sample_weight` or `class_weight` to model outputs. + + Arguments: + x_weight: User-provided `sample_weight` or `class_weight` argument. + output_names: List of output names (strings) in the model. + weight_type: A string used purely for exception printing. + + Returns: + A list of `sample_weight` or `class_weight` where there are exactly + one element per model output. + + Raises: + ValueError: In case of invalid user-provided argument. + """ + if x_weight is None or len(x_weight) == 0: # pylint: disable=g-explicit-length-test + return [None for _ in output_names] + if len(output_names) == 1: + if isinstance(x_weight, list) and len(x_weight) == 1: + return x_weight + if isinstance(x_weight, dict) and output_names[0] in x_weight: + return [x_weight[output_names[0]]] + else: + return [x_weight] + if isinstance(x_weight, list): + if len(x_weight) != len(output_names): + raise ValueError('Provided `' + weight_type + '` was a list of ' + + str(len(x_weight)) + ' elements, but the model has ' + + str(len(output_names)) + ' outputs. ' + 'You should provide one `' + weight_type + '`' + 'array per model output.') + return x_weight + if isinstance(x_weight, dict): + x_weights = [] + for name in output_names: + x_weights.append(x_weight.get(name)) + return x_weights + else: + raise TypeError( + 'The model has multiple outputs, so `' + weight_type + '` ' + 'should be either a list or a dict. ' + 'Provided `' + weight_type + '` type not understood: ' + str(x_weight)) + + +def standardize_class_weights(class_weight, output_names): + return standardize_sample_or_class_weights(class_weight, output_names, + 'class_weight') + + +def standardize_sample_weights(sample_weight, output_names): + return standardize_sample_or_class_weights(sample_weight, output_names, + 'sample_weight') + + +def check_array_lengths(inputs, targets, weights=None): + """Does user input validation for numpy arrays. + + Arguments: + inputs: list of Numpy arrays of inputs. + targets: list of Numpy arrays of targets. + weights: list of Numpy arrays of sample weights. + + Raises: + ValueError: in case of incorrectly formatted data. + """ + + def set_of_lengths(x): + # return a set with the variation between + # different shapes, with None => 0 + if x is None: + return {} + else: + return set([y.shape[0] for y in x if y is not None]) + + set_x = set_of_lengths(inputs) + set_y = set_of_lengths(targets) + set_w = set_of_lengths(weights) + if len(set_x) > 1: + raise ValueError('All input arrays (x) should have ' + 'the same number of samples. Got array shapes: ' + + str([x.shape for x in inputs])) + if len(set_y) > 1: + raise ValueError('All target arrays (y) should have ' + 'the same number of samples. Got array shapes: ' + + str([y.shape for y in targets])) + if set_x and set_y and list(set_x)[0] != list(set_y)[0]: + raise ValueError('Input arrays should have ' + 'the same number of samples as target arrays. ' + 'Found ' + str(list(set_x)[0]) + ' input samples ' + 'and ' + str(list(set_y)[0]) + ' target samples.') + if len(set_w) > 1: + raise ValueError('All sample_weight arrays should have ' + 'the same number of samples. Got array shapes: ' + + str([w.shape for w in weights])) + if set_y and set_w and list(set_y)[0] != list(set_w)[0]: + raise ValueError('Sample_weight arrays should have ' + 'the same number of samples as target arrays. Got ' + + str(list(set_y)[0]) + ' input samples and ' + + str(list(set_w)[0]) + ' target samples.') + + +def check_loss_and_target_compatibility(targets, loss_fns, output_shapes): + """Does validation on the compatibility of targets and loss functions. + + This helps prevent users from using loss functions incorrectly. This check + is purely for UX purposes. + + Arguments: + targets: list of Numpy arrays of targets. + loss_fns: list of loss functions. + output_shapes: list of shapes of model outputs. + + Raises: + ValueError: if a loss function or target array + is incompatible with an output. + """ + key_losses = { + losses.mean_squared_error, losses.binary_crossentropy, + losses.categorical_crossentropy + } + for y, loss, shape in zip(targets, loss_fns, output_shapes): + if y is None or loss is None or tensor_util.is_tensor(y): + continue + if loss is losses.categorical_crossentropy: + if y.shape[-1] == 1: + raise ValueError('You are passing a target array of shape ' + str( + y.shape) + ' while using as loss `categorical_crossentropy`. ' + '`categorical_crossentropy` expects ' + 'targets to be binary matrices (1s and 0s) ' + 'of shape (samples, classes). ' + 'If your targets are integer classes, ' + 'you can convert them to the expected format via:\n' + '```\n' + 'from keras.utils import to_categorical\n' + 'y_binary = to_categorical(y_int)\n' + '```\n' + '\n' + 'Alternatively, you can use the loss function ' + '`sparse_categorical_crossentropy` instead, ' + 'which does expect integer targets.') + if loss in key_losses: + for target_dim, out_dim in zip(y.shape[1:], shape[1:]): + if out_dim is not None and target_dim != out_dim: + raise ValueError('A target array with shape ' + str(y.shape) + + ' was passed for an output of shape ' + str(shape) + + ' while using as loss `' + loss.__name__ + '`. ' + 'This loss expects ' + 'targets to have the same shape ' + 'as the output.') + + +def collect_metrics(metrics, output_names): + """Maps metric functions to model outputs. + + Arguments: + metrics: a list or dict of metric functions. + output_names: a list of the names (strings) of model outputs. + + Returns: + A list (one entry per model output) of lists of metric functions. + For instance, if the model has 2 outputs, and for the first output + we want to compute "binary_accuracy" and "binary_crossentropy", + and just "binary_accuracy" for the second output, + the list would look like: + `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]` + + Raises: + TypeError: if an incorrect type is passed for the `metrics` argument. + """ + if not metrics: + return [[] for _ in output_names] + if isinstance(metrics, list): + # we then apply all metrics to all outputs. + return [copy.copy(metrics) for _ in output_names] + elif isinstance(metrics, dict): + nested_metrics = [] + for name in output_names: + output_metrics = metrics.get(name, []) + if not isinstance(output_metrics, list): + output_metrics = [output_metrics] + nested_metrics.append(output_metrics) + return nested_metrics + else: + raise TypeError('Type of `metrics` argument not understood. ' + 'Expected a list or dictionary, found: ' + str(metrics)) + + +def batch_shuffle(index_array, batch_size): + """Shuffles an array in a batch-wise fashion. + + Useful for shuffling HDF5 arrays + (where one cannot access arbitrary indices). + + Arguments: + index_array: array of indices to be shuffled. + batch_size: integer. + + Returns: + The `index_array` array, shuffled in a batch-wise fashion. + """ + batch_count = int(len(index_array) / batch_size) + # to reshape we need to be cleanly divisible by batch size + # we stash extra items and reappend them after shuffling + last_batch = index_array[batch_count * batch_size:] + index_array = index_array[:batch_count * batch_size] + index_array = index_array.reshape((batch_count, batch_size)) + np.random.shuffle(index_array) + index_array = index_array.flatten() + return np.append(index_array, last_batch) + + +def weighted_masked_objective(fn): + """Adds support for masking and sample-weighting to an objective function. + + It transforms an objective function `fn(y_true, y_pred)` + into a sample-weighted, cost-masked objective function + `fn(y_true, y_pred, weights, mask)`. + + Arguments: + fn: The objective function to wrap, + with signature `fn(y_true, y_pred)`. + + Returns: + A function with signature `fn(y_true, y_pred, weights, mask)`. + """ + if fn is None: + return None + + def weighted(y_true, y_pred, weights, mask=None): + """Wrapper function. + + Arguments: + y_true: `y_true` argument of `fn`. + y_pred: `y_pred` argument of `fn`. + weights: Weights tensor. + mask: Mask tensor. + + Returns: + Scalar tensor. + """ + # score_array has ndim >= 2 + score_array = fn(y_true, y_pred) + if mask is not None: + # Cast the mask to floatX to avoid float64 upcasting in theano + mask = K.cast(mask, K.floatx()) + # mask should have the same shape as score_array + score_array *= mask + # the loss per batch should be proportional + # to the number of unmasked samples. + score_array /= K.mean(mask) + + # apply sample weighting + if weights is not None: + # reduce score_array to same ndim as weight array + ndim = K.ndim(score_array) + weight_ndim = K.ndim(weights) + score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) + score_array *= weights + score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) + return K.mean(score_array) + + return weighted + + +def standardize_weights(y, + sample_weight=None, + class_weight=None, + sample_weight_mode=None): + """Performs sample weight validation and standardization. + + Everything gets normalized to a single sample-wise (or timestep-wise) + weight array. + + Arguments: + y: Numpy array of model targets to be weighted. + sample_weight: User-provided `sample_weight` argument. + class_weight: User-provided `class_weight` argument. + sample_weight_mode: One of `None` or `"temporal"`. + `"temporal"` indicated that we expect 2D weight data + that will be applied to the last 2 dimensions of + the targets (i.e. we are weighting timesteps, not samples). + + Returns: + A numpy array of target weights, one entry per sample to weight. + + Raises: + ValueError: In case of invalid user-provided arguments. + """ + if sample_weight_mode is not None: + if sample_weight_mode != 'temporal': + raise ValueError('"sample_weight_mode ' + 'should be None or "temporal". ' + 'Found: ' + str(sample_weight_mode)) + if len(y.shape) < 3: + raise ValueError('Found a sample_weight array for ' + 'an input with shape ' + str(y.shape) + '. ' + 'Timestep-wise sample weighting (use of ' + 'sample_weight_mode="temporal") is restricted to ' + 'outputs that are at least 3D, i.e. that have ' + 'a time dimension.') + if sample_weight is not None and len(sample_weight.shape) != 2: + raise ValueError('Found a sample_weight array with shape ' + + str(sample_weight.shape) + '. ' + 'In order to use timestep-wise sample weighting, ' + 'you should pass a 2D sample_weight array.') + else: + if sample_weight is not None and len(sample_weight.shape) != 1: + raise ValueError('Found a sample_weight array with shape ' + + str(sample_weight.shape) + '. ' + 'In order to use timestep-wise sample weights, ' + 'you should specify ' + 'sample_weight_mode="temporal" ' + 'in compile(). If you just mean to use ' + 'sample-wise weights, make sure your ' + 'sample_weight array is 1D.') + + if sample_weight is not None: + if len(sample_weight.shape) > len(y.shape): + raise ValueError( + 'Found a sample_weight with shape' + str(sample_weight.shape) + '.' + 'Expected sample_weight with rank ' + 'less than or equal to ' + str(len(y.shape))) + + if y.shape[:sample_weight.ndim] != sample_weight.shape: + raise ValueError( + 'Found a sample_weight array with shape ' + str(sample_weight.shape) + + ' for an input with shape ' + str(y.shape) + '. ' + 'sample_weight cannot be broadcast.') + return sample_weight + elif isinstance(class_weight, dict): + if len(y.shape) > 2: + raise ValueError('`class_weight` not supported for ' + '3+ dimensional targets.') + if y.shape[1] > 1: + y_classes = np.argmax(y, axis=1) + elif y.shape[1] == 1: + y_classes = np.reshape(y, y.shape[0]) + else: + y_classes = y + + weights = np.asarray( + [class_weight[cls] for cls in y_classes if cls in class_weight]) + + if len(weights) != len(y_classes): + # subtract the sets to pick all missing classes + existing_classes = set(y_classes) + existing_class_weight = set(class_weight.keys()) + raise ValueError('`class_weight` must contain all classes in the data.' + ' The classes %s exist in the data but not in ' + '`class_weight`.' % + (existing_classes - existing_class_weight)) + return weights + else: + return None diff --git a/tensorflow/python/keras/_impl/keras/utils/__init__.py b/tensorflow/python/keras/_impl/keras/utils/__init__.py index 370ae0dd0f..0c9f19a0c8 100644 --- a/tensorflow/python/keras/_impl/keras/utils/__init__.py +++ b/tensorflow/python/keras/_impl/keras/utils/__init__.py @@ -31,8 +31,8 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_ke from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary +from tensorflow.python.keras._impl.keras.utils.multi_gpu_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.np_utils import normalize from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical -from tensorflow.python.keras._impl.keras.utils.training_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.vis_utils import plot_model diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils.py b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py similarity index 100% rename from tensorflow/python/keras/_impl/keras/utils/training_utils.py rename to tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py similarity index 100% rename from tensorflow/python/keras/_impl/keras/utils/training_utils_test.py rename to tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py diff --git a/tensorflow/python/keras/utils/__init__.py b/tensorflow/python/keras/utils/__init__.py index 91cc860727..2f74cf031d 100644 --- a/tensorflow/python/keras/utils/__init__.py +++ b/tensorflow/python/keras/utils/__init__.py @@ -30,9 +30,9 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model +from tensorflow.python.keras._impl.keras.utils.multi_gpu_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.np_utils import normalize from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical -from tensorflow.python.keras._impl.keras.utils.training_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.vis_utils import plot_model del absolute_import -- GitLab From 4d631ce22f2902ed11b5e56a6241983dfa5d3eed Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 1 Mar 2018 12:04:59 -0800 Subject: [PATCH 212/311] TFE: Cache `TensorShape` object for `EagerTensor`'s, for performance. PiperOrigin-RevId: 187512946 --- tensorflow/python/eager/pywrap_tensor.cc | 25 ++++++++++++++++++++++++ tensorflow/python/framework/ops.py | 6 +++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index d3aaede749..8338bc4343 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -186,6 +186,10 @@ typedef struct EagerTensor { // This stores `_keras_mask` object and is set by Tensorflow layers. PyObject* keras_mask; + // This stores `_tensor_shape`, a cached `TensorShape` object, and is set the + // first time that `_EagerTensorBase`'s `shape` property is called. + PyObject* tensor_shape; + // We store a status object here as an optimization to avoid allocating a new // Status objects on different functions that operate on EagerTensor and need // to use a TF_Status object. However note that accesses to `status` are not @@ -201,6 +205,8 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { self->handle_data = Py_None; Py_INCREF(Py_None); self->keras_mask = Py_None; + Py_INCREF(Py_None); + self->tensor_shape = Py_None; self->status = TF_NewStatus(); PyObject* value; PyObject* context = nullptr; @@ -333,6 +339,7 @@ void EagerTensor_dealloc(EagerTensor* self) { TF_DeleteStatus(self->status); Py_DECREF(self->handle_data); Py_DECREF(self->keras_mask); + Py_DECREF(self->tensor_shape); TFE_DeleteTensorHandle(self->handle); self->handle = nullptr; // We have the global interpreter lock, so use this chance to perform delayed @@ -420,6 +427,19 @@ static int EagerTensor_setkeras_mask(EagerTensor* self, PyObject* value, self->keras_mask = value; return 0; } + +static PyObject* EagerTensor_tensor_shape(EagerTensor* self, void* unused) { + Py_INCREF(self->tensor_shape); + return self->tensor_shape; +} + +static int EagerTensor_settensor_shape(EagerTensor* self, PyObject* value, + void* unused) { + Py_DECREF(self->tensor_shape); + Py_INCREF(value); + self->tensor_shape = value; + return 0; +} // Function `_copy_to_device`. static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args, PyObject* kwds) { @@ -484,6 +504,9 @@ static PyGetSetDef EagerTensor_getseters[] = { {const_cast("_keras_mask"), (getter)EagerTensor_keras_mask, (setter)EagerTensor_setkeras_mask, const_cast("_keras_mask"), nullptr}, + {const_cast("_tensor_shape"), (getter)EagerTensor_tensor_shape, + (setter)EagerTensor_settensor_shape, const_cast("_tensor_shape"), + nullptr}, {nullptr} /* Sentinel */ }; @@ -599,6 +622,8 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { t->handle_data = Py_None; Py_INCREF(Py_None); t->keras_mask = Py_None; + Py_INCREF(Py_None); + t->tensor_shape = Py_None; t->handle = handle; t->status = TF_NewStatus(); } diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 735ba316d0..0a85b153de 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -782,7 +782,11 @@ class _EagerTensorBase(Tensor): @property def shape(self): - return tensor_shape.TensorShape(self._shape_tuple()) + if self._tensor_shape is None: # pylint: disable=access-member-before-definition + # `_tensor_shape` is declared and defined in the definition of + # `EagerTensor`, in C. + self._tensor_shape = tensor_shape.TensorShape(self._shape_tuple()) + return self._tensor_shape def get_shape(self): """Alias of Tensor.shape.""" -- GitLab From c953be2e880b3f751e014f947c2d054e4a22c3e2 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 1 Mar 2018 12:23:37 -0800 Subject: [PATCH 213/311] Remove underscore prefix from the following HIDDEN ops: add_sparse_to_tensors_map, add_many_sparse_to_tensors_map and take_many_sparse_from_tensors_map. PiperOrigin-RevId: 187515638 --- tensorflow/python/framework/python_op_gen.cc | 4 +--- tensorflow/python/ops/sparse_ops.py | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 4813458f07..64d214a07f 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -100,10 +100,8 @@ bool IsOpWithUnderscorePrefix(const string& s) { "fused_batch_norm", "histogram_fixed_width", "stack", "batch_norm_with_global_normalization", // TODO(annarev): replace these ops in the next change. - "add_sparse_to_tensors_map", "add_many_sparse_to_tensors_map", "broadcast_gradient_args", "concat", "enter", "histogram_summary", - "ref_enter", "ref_identity", "scalar_summary", - "take_many_sparse_from_tensors_map"}); + "ref_enter", "ref_identity", "scalar_summary"}); return kUnderscoreOps->count(s) > 0; } diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index a01bba632f..c580052c32 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2046,7 +2046,7 @@ def _add_sparse_to_tensors_map(sp_input, """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._add_sparse_to_tensors_map( + return gen_sparse_ops.add_sparse_to_tensors_map( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -2086,7 +2086,7 @@ def _add_many_sparse_to_tensors_map(sp_input, """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._add_many_sparse_to_tensors_map( + return gen_sparse_ops.add_many_sparse_to_tensors_map( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -2167,7 +2167,7 @@ def _take_many_sparse_from_tensors_map(sparse_map_op, with ops.colocate_with(sparse_map_op): shared_name = sparse_map_op.get_attr("shared_name") or sparse_map_op.name output_indices, output_values, output_shape = ( - gen_sparse_ops._take_many_sparse_from_tensors_map( + gen_sparse_ops.take_many_sparse_from_tensors_map( sparse_handles, dtype=sparse_map_op.get_attr("T"), container=sparse_map_op.get_attr("container"), -- GitLab From 1df40b152216bde47dd9ac1fa65bec57434920e1 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 1 Mar 2018 12:56:05 -0800 Subject: [PATCH 214/311] [XLA] Fully qualify xla::MakeUnique uses in shape_tree.h. No functional changes. PiperOrigin-RevId: 187520283 --- tensorflow/compiler/xla/shape_tree.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index 280f02e886..ffaa40c2d6 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -53,7 +53,7 @@ struct ShapeTreeNode { ShapeTreeNode(const ShapeTreeNode& other) : data(other.data), children(other.children.size()) { for (size_t i = 0; i < children.size(); ++i) { - children[i] = MakeUnique(*other.children[i]); + children[i] = ::xla::MakeUnique(*other.children[i]); } } @@ -62,7 +62,7 @@ struct ShapeTreeNode { data = other.data; children.resize(other.children.size()); for (size_t i = 0; i < children.size(); ++i) { - children[i] = MakeUnique(*other.children[i]); + children[i] = ::xla::MakeUnique(*other.children[i]); } } return *this; @@ -445,7 +445,7 @@ class ShapeTreeIterator : public std::iterator(index, node_->data); + current_ = ::xla::MakeUnique(index, node_->data); return *current_; } @@ -492,7 +492,7 @@ void ShapeTree::InitChildren(const Shape& shape, Node* node) { template ShapeTree::ShapeTree(Shape shape) : root_(), - shape_storage_(MakeUnique(std::move(shape))), + shape_storage_(::xla::MakeUnique(std::move(shape))), shape_(shape_storage_.get()) { // The shape_ field is just used to hold the structure of the shape. // It should not be relied upon to store layout information. @@ -508,7 +508,7 @@ ShapeTree::ShapeTree(const Shape* shape) : root_(), shape_(shape) { template ShapeTree::ShapeTree(Shape shape, const T& init_value) : root_(init_value), - shape_storage_(MakeUnique(std::move(shape))), + shape_storage_(::xla::MakeUnique(std::move(shape))), shape_(shape_storage_.get()) { // The shape_ field is just used to hold the structure of the shape. // It should not be relied upon to store layout information. -- GitLab From deef58ba3913c4ab9ca93876cd30744db00c4a6a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 13:00:40 -0800 Subject: [PATCH 215/311] Cast sequence_length to an integer. PiperOrigin-RevId: 187520920 --- .../feature_column/sequence_feature_column.py | 2 +- .../sequence_feature_column_test.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index e99033bbec..e446043bdd 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -295,7 +295,7 @@ def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): row_ids = sp_tensor.indices[:, 0] column_ids = sp_tensor.indices[:, 1] column_ids += array_ops.ones_like(column_ids) - seq_length = ( + seq_length = math_ops.to_int64( math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) # If the last n rows do not have ids, seq_length will have shape # [batch_size - n]. Pad the remaining values with zeros. diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 8c37ccf11b..105213680e 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -221,8 +221,9 @@ class SequenceCategoricalColumnWithIdentityTest(test.TestCase): sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) + sequence_length = sess.run(sequence_length) + self.assertAllEqual(expected_sequence_length, sequence_length) + self.assertEqual(np.int64, sequence_length.dtype) def test_sequence_length_with_zeros(self): column = sfc.sequence_categorical_column_with_identity( @@ -311,8 +312,9 @@ class SequenceEmbeddingColumnTest(test.TestCase): _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) + sequence_length = sess.run(sequence_length) + self.assertAllEqual(expected_sequence_length, sequence_length) + self.assertEqual(np.int64, sequence_length.dtype) def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" @@ -423,8 +425,9 @@ class SequenceNumericColumnTest(test.TestCase): _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) + sequence_length = sess.run(sequence_length) + self.assertAllEqual(expected_sequence_length, sequence_length) + self.assertEqual(np.int64, sequence_length.dtype) def test_sequence_length_with_shape(self): """Tests _sequence_length with shape !=(1,).""" -- GitLab From 16478853c73d9e6dfab26e73e99d931f4c74043c Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 1 Mar 2018 13:04:44 -0800 Subject: [PATCH 216/311] Fix parameter name mismatches in declarations/definitions. Reported by clang-tidy PiperOrigin-RevId: 187521627 --- .../xla/client/compile_only_client.cc | 13 +++++------ .../xla/client/computation_builder.cc | 23 ++++++++++--------- .../compiler/xla/client/computation_builder.h | 2 +- tensorflow/compiler/xla/client/local_client.h | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/client/compile_only_client.cc b/tensorflow/compiler/xla/client/compile_only_client.cc index c7e2c4367b..59662c95ac 100644 --- a/tensorflow/compiler/xla/client/compile_only_client.cc +++ b/tensorflow/compiler/xla/client/compile_only_client.cc @@ -39,16 +39,15 @@ CompileOnlyClient::CompileAheadOfTime( return compiler_service_->CompileAheadOfTime(service_instances, options); } -int64 CompileOnlyClient::PointerSizeForTriple( - tensorflow::StringPiece target_triple) { - llvm::Triple triple(llvm::Triple::normalize( - llvm::StringRef(target_triple.data(), target_triple.size()))); - if (triple.isArch64Bit()) { +int64 CompileOnlyClient::PointerSizeForTriple(tensorflow::StringPiece triple) { + llvm::Triple llvm_triple( + llvm::Triple::normalize(llvm::StringRef(triple.data(), triple.size()))); + if (llvm_triple.isArch64Bit()) { return 8; - } else if (triple.isArch32Bit()) { + } else if (llvm_triple.isArch32Bit()) { return 4; } else { - CHECK(triple.isArch16Bit()); + CHECK(llvm_triple.isArch16Bit()); return 2; } } diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 2a6e02649d..4afef6e448 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -408,7 +408,7 @@ ComputationDataHandle ComputationBuilder::Reshape( ComputationDataHandle ComputationBuilder::Collapse( const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dims_to_collapse) { + tensorflow::gtl::ArraySlice dimensions) { if (!first_error_.ok()) { return ComputationDataHandle(); } @@ -416,8 +416,8 @@ ComputationDataHandle ComputationBuilder::Collapse( // Don't support out-of-order collapse here. // Checks that the collapsed dimensions are in order and consecutive. for (tensorflow::gtl::ArraySlice::size_type i = 1; - i < dims_to_collapse.size(); ++i) { - if (dims_to_collapse[i] - 1 != dims_to_collapse[i - 1]) { + i < dimensions.size(); ++i) { + if (dimensions[i] - 1 != dimensions[i - 1]) { NoteError(InvalidArgument( "Collapsed dimensions are not in order and consecutive.")); return ComputationDataHandle(); @@ -434,9 +434,9 @@ ComputationDataHandle ComputationBuilder::Collapse( VLOG(3) << "original shape: " << ShapeUtil::HumanString(*original_shape); VLOG(3) << "dims to collapse: " - << tensorflow::str_util::Join(dims_to_collapse, ","); + << tensorflow::str_util::Join(dimensions, ","); - if (dims_to_collapse.size() <= 1) { + if (dimensions.size() <= 1) { // Not collapsing anything, trivially we can return the operand versus // enqueueing a trivial reshape. return operand; @@ -444,7 +444,7 @@ ComputationDataHandle ComputationBuilder::Collapse( std::vector new_sizes; for (int i = 0; i < ShapeUtil::Rank(*original_shape); ++i) { - if (i <= dims_to_collapse.front() || i > dims_to_collapse.back()) { + if (i <= dimensions.front() || i > dimensions.back()) { new_sizes.push_back(original_shape->dimensions(i)); } else { new_sizes.back() *= original_shape->dimensions(i); @@ -753,13 +753,13 @@ ComputationDataHandle ComputationBuilder::Infeed(const Shape& shape, } void ComputationBuilder::Outfeed(const ComputationDataHandle& operand, - const Shape& shape, + const Shape& shape_with_layout, const string& outfeed_config) { OpRequest op_request; OutfeedRequest* request = op_request.mutable_outfeed_request(); request->set_outfeed_config(outfeed_config); *request->mutable_operand() = operand; - *request->mutable_shape() = shape; + *request->mutable_shape() = shape_with_layout; RunOpAndNoteError(&op_request); } @@ -1382,15 +1382,16 @@ ComputationDataHandle ComputationBuilder::BatchNormInference( ComputationDataHandle ComputationBuilder::BatchNormGrad( const ComputationDataHandle& operand, const ComputationDataHandle& scale, - const ComputationDataHandle& mean, const ComputationDataHandle& var, + const ComputationDataHandle& batch_mean, + const ComputationDataHandle& batch_var, const ComputationDataHandle& grad_output, float epsilon, int64 feature_index) { OpRequest op_request; BatchNormGradRequest* request = op_request.mutable_batch_norm_grad_request(); *request->mutable_operand() = operand; *request->mutable_scale() = scale; - *request->mutable_mean() = mean; - *request->mutable_variance() = var; + *request->mutable_mean() = batch_mean; + *request->mutable_variance() = batch_var; *request->mutable_grad_output() = grad_output; request->set_epsilon(epsilon); request->set_feature_index(feature_index); diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 377b671639..e085fcb3b1 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -872,7 +872,7 @@ class ComputationBuilder { Window* window); // Internal helper method that does the building for an arbitrary unary op. - ComputationDataHandle UnaryOp(UnaryOperation binop, + ComputationDataHandle UnaryOp(UnaryOperation unop, const ComputationDataHandle& operand); // Internal helper method that does the building for an arbitrary binary op. diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index b52a30f5a0..de0ed13c43 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -69,7 +69,7 @@ class LocalExecutable { // of the computation. tensorflow::Status ValidateExecutionOptions( const tensorflow::gtl::ArraySlice arguments, - const ExecutableRunOptions& options, const Backend& backend); + const ExecutableRunOptions& run_options, const Backend& backend); // Records the computation in a SessionModule proto with the arguments used to // invoke it, and the result. Enabled by flag: --tla_dump_executions_to. -- GitLab From 8307faacb96808eae1550ed879fa9a85cf76d897 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 13:09:46 -0800 Subject: [PATCH 217/311] Add support for keyword args for dynamically converted functions. PiperOrigin-RevId: 187522324 --- tensorflow/contrib/py2tf/converters/call_trees.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index f18f9f6086..ca8726f916 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -185,7 +185,7 @@ class CallTreeTransformer(transformer.Base): """ return templates.replace(template, func=node.func, original_args=node.args) - def _converted_call(self, node): + def _insert_dynamic_conversion(self, node): """Inlines a dynamic conversion for a dynamic function.""" # TODO(mdan): Pass information on the statically compiled functions. # Having access to the statically compiled functions can help avoid @@ -208,7 +208,10 @@ class CallTreeTransformer(transformer.Base): """ call_expr = templates.replace( template, func=node.func, original_args=node.args) - return call_expr[0].value + new_call = call_expr[0].value + # TODO(mdan): Improve the template mechanism to better support this. + new_call.keywords = node.keywords + return new_call # pylint:disable=invalid-name @@ -251,7 +254,7 @@ class CallTreeTransformer(transformer.Base): raise NotImplementedError('py_func with return values') else: if self.context.recursive: - node = self._converted_call(node) + node = self._insert_dynamic_conversion(node) else: # Unresolved functions are allowed in non-recursive mode. pass -- GitLab From 0abc4c9ecae912676f6070ca4b76b35c80351c26 Mon Sep 17 00:00:00 2001 From: Fred Reiss Date: Thu, 1 Mar 2018 13:25:21 -0800 Subject: [PATCH 218/311] Clean up output formatting of saved_model_cli.py (#17235) --- .../docs_src/programmers_guide/saved_model.md | 60 ++++---- tensorflow/python/tools/saved_model_cli.py | 68 +++++---- .../python/tools/saved_model_cli_test.py | 141 +++++++++--------- 3 files changed, 142 insertions(+), 127 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index f18d50b282..c54c278584 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -697,15 +697,15 @@ executing the computation graph later. For example: $ saved_model_cli show --dir \ /tmp/saved_model_dir --tag_set serve --signature_def serving_default The given SavedModel SignatureDef contains the following input(s): -inputs['x'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x:0 + inputs['x'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x:0 The given SavedModel SignatureDef contains the following output(s): -outputs['y'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 + outputs['y'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 Method name is: tensorflow/serving/predict ``` @@ -717,32 +717,32 @@ $ saved_model_cli show --dir /tmp/saved_model_dir --all MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['classify_x2_to_y3']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x2:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['scores'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y3:0 -Method name is: tensorflow/serving/classify + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x2:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['scores'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y3:0 + Method name is: tensorflow/serving/classify ... signature_def['serving_default']: -The given SavedModel SignatureDef contains the following input(s): -inputs['x'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['y'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/predict + The given SavedModel SignatureDef contains the following input(s): + inputs['x'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['y'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/predict ``` diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index 33f6debbcb..b0e9e3e5ed 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -115,7 +115,7 @@ def _get_outputs_tensor_info_from_meta_graph_def(meta_graph_def, signature_def_key).outputs -def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key): +def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key, indent=0): """Prints input and output TensorInfos. Prints the details of input and output TensorInfos for the SignatureDef mapped @@ -126,6 +126,7 @@ def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key): tag_set: Group of tag(s) of the MetaGraphDef, in string format, separated by ','. For tag-set contains multiple tags, all tags must be passed in. signature_def_key: A SignatureDef key string. + indent: How far (in increments of 2 spaces) to indent each line of output. """ meta_graph_def = saved_model_utils.get_meta_graph_def(saved_model_dir, tag_set) @@ -134,29 +135,39 @@ def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key): outputs_tensor_info = _get_outputs_tensor_info_from_meta_graph_def( meta_graph_def, signature_def_key) - print('The given SavedModel SignatureDef contains the following input(s):') + indent_str = " " * indent + def in_print(s): + print(indent_str + s) + + in_print('The given SavedModel SignatureDef contains the following input(s):') for input_key, input_tensor in sorted(inputs_tensor_info.items()): - print('inputs[\'%s\'] tensor_info:' % input_key) - _print_tensor_info(input_tensor) + in_print(' inputs[\'%s\'] tensor_info:' % input_key) + _print_tensor_info(input_tensor, indent+1) - print('The given SavedModel SignatureDef contains the following output(s):') + in_print('The given SavedModel SignatureDef contains the following ' + 'output(s):') for output_key, output_tensor in sorted(outputs_tensor_info.items()): - print('outputs[\'%s\'] tensor_info:' % output_key) - _print_tensor_info(output_tensor) + in_print(' outputs[\'%s\'] tensor_info:' % output_key) + _print_tensor_info(output_tensor, indent+1) - print('Method name is: %s' % - meta_graph_def.signature_def[signature_def_key].method_name) + in_print('Method name is: %s' % + meta_graph_def.signature_def[signature_def_key].method_name) -def _print_tensor_info(tensor_info): +def _print_tensor_info(tensor_info, indent=0): """Prints details of the given tensor_info. Args: tensor_info: TensorInfo object to be printed. + indent: How far (in increments of 2 spaces) to indent each line output """ - print(' dtype: ' + - {value: key - for (key, value) in types_pb2.DataType.items()}[tensor_info.dtype]) + indent_str = " " * indent + def in_print(s): + print(indent_str + s) + + in_print(' dtype: ' + + {value: key + for (key, value) in types_pb2.DataType.items()}[tensor_info.dtype]) # Display shape as tuple. if tensor_info.tensor_shape.unknown_rank: shape = 'unknown_rank' @@ -164,8 +175,8 @@ def _print_tensor_info(tensor_info): dims = [str(dim.size) for dim in tensor_info.tensor_shape.dim] shape = ', '.join(dims) shape = '(' + shape + ')' - print(' shape: ' + shape) - print(' name: ' + tensor_info.name) + in_print(' shape: ' + shape) + in_print(' name: ' + tensor_info.name) def _show_all(saved_model_dir): @@ -186,7 +197,8 @@ def _show_all(saved_model_dir): signature_def_map = get_signature_def_map(saved_model_dir, tag_set) for signature_def_key in sorted(signature_def_map.keys()): print('\nsignature_def[\'' + signature_def_key + '\']:') - _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key) + _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key, + indent=1) def get_meta_graph_def(saved_model_dir, tag_set): @@ -614,19 +626,19 @@ def create_parser(): show_msg = ( 'Usage examples:\n' 'To show all tag-sets in a SavedModel:\n' - '$saved_model_cli show --dir /tmp/saved_model\n' + '$saved_model_cli show --dir /tmp/saved_model\n\n' 'To show all available SignatureDef keys in a ' 'MetaGraphDef specified by its tag-set:\n' - '$saved_model_cli show --dir /tmp/saved_model --tag_set serve\n' + '$saved_model_cli show --dir /tmp/saved_model --tag_set serve\n\n' 'For a MetaGraphDef with multiple tags in the tag-set, all tags must be ' 'passed in, separated by \';\':\n' '$saved_model_cli show --dir /tmp/saved_model --tag_set serve,gpu\n\n' 'To show all inputs and outputs TensorInfo for a specific' ' SignatureDef specified by the SignatureDef key in a' ' MetaGraph.\n' - '$saved_model_cli show --dir /tmp/saved_model --tag_set serve ' - '--signature_def serving_default\n\n' - 'To show all available information in the SavedModel\n:' + '$saved_model_cli show --dir /tmp/saved_model --tag_set serve' + ' --signature_def serving_default\n\n' + 'To show all available information in the SavedModel:\n' '$saved_model_cli show --dir /tmp/saved_model --all') parser_show = subparsers.add_parser( 'show', @@ -658,12 +670,14 @@ def create_parser(): run_msg = ('Usage example:\n' 'To run input tensors from files through a MetaGraphDef and save' ' the output tensors to files:\n' - '$saved_model_cli show --dir /tmp/saved_model --tag_set serve ' - '--signature_def serving_default ' - '--inputs input1_key=/tmp/124.npz[x],input2_key=/tmp/123.npy ' - '--input_exprs \'input3_key=np.ones(2)\' --input_examples ' - '\'input4_key=[{"id":[26],"weights":[0.5, 0.5]}]\' ' - '--outdir=/out\n\n' + '$saved_model_cli show --dir /tmp/saved_model --tag_set serve \\\n' + ' --signature_def serving_default \\\n' + ' --inputs input1_key=/tmp/124.npz[x],input2_key=/tmp/123.npy ' + '\\\n' + ' --input_exprs \'input3_key=np.ones(2)\' \\\n' + ' --input_examples ' + '\'input4_key=[{"id":[26],"weights":[0.5, 0.5]}]\' \\\n' + ' --outdir=/out\n\n' 'For more information about input file format, please see:\n' 'https://www.tensorflow.org/programmers_guide/saved_model_cli\n') parser_run = subparsers.add_parser( diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index d6cbc49ba1..f99c844845 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -61,83 +61,84 @@ class SavedModelCLITestCase(test.TestCase): exp_out = """MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['classify_x2_to_y3']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x2:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['scores'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y3:0 -Method name is: tensorflow/serving/classify + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x2:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['scores'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y3:0 + Method name is: tensorflow/serving/classify signature_def['classify_x_to_y']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_STRING - shape: unknown_rank - name: tf_example:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['scores'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/classify + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_STRING + shape: unknown_rank + name: tf_example:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['scores'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/classify signature_def['regress_x2_to_y3']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x2:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['outputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y3:0 -Method name is: tensorflow/serving/regress + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x2:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['outputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y3:0 + Method name is: tensorflow/serving/regress signature_def['regress_x_to_y']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_STRING - shape: unknown_rank - name: tf_example:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['outputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/regress + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_STRING + shape: unknown_rank + name: tf_example:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['outputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/regress signature_def['regress_x_to_y2']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_STRING - shape: unknown_rank - name: tf_example:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['outputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y2:0 -Method name is: tensorflow/serving/regress + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_STRING + shape: unknown_rank + name: tf_example:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['outputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y2:0 + Method name is: tensorflow/serving/regress signature_def['serving_default']: -The given SavedModel SignatureDef contains the following input(s): -inputs['x'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['y'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/predict""" + The given SavedModel SignatureDef contains the following input(s): + inputs['x'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['y'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/predict""" # pylint: enable=line-too-long + self.maxDiff = None # Produce a useful error msg if the comparison fails self.assertMultiLineEqual(output, exp_out) self.assertEqual(err.getvalue().strip(), '') @@ -193,11 +194,11 @@ Method name is: tensorflow/serving/predict""" output = out.getvalue().strip() expected_output = ( 'The given SavedModel SignatureDef contains the following input(s):\n' - 'inputs[\'x\'] tensor_info:\n' - ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: x:0\n' + ' inputs[\'x\'] tensor_info:\n' + ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: x:0\n' 'The given SavedModel SignatureDef contains the following output(s):\n' - 'outputs[\'y\'] tensor_info:\n' - ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: y:0\n' + ' outputs[\'y\'] tensor_info:\n' + ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: y:0\n' 'Method name is: tensorflow/serving/predict') self.assertEqual(output, expected_output) self.assertEqual(err.getvalue().strip(), '') -- GitLab From eec6cbd4a60c8525d6601ceebf50511cefa50ec1 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Thu, 1 Mar 2018 13:37:16 -0800 Subject: [PATCH 219/311] Fix TensorRT build. PiperOrigin-RevId: 187526192 --- tensorflow/contrib/tensorrt/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 65a0e903a7..3b7b68f61b 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -107,6 +107,7 @@ tf_cuda_library( tf_gen_op_wrapper_py( name = "trt_engine_op", + gen_locally = True, deps = [ ":trt_engine_op_op_lib", ":trt_logging", -- GitLab From 80710d5c53a8b2896a57dbe026d7f742e71fc03b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 13:43:03 -0800 Subject: [PATCH 220/311] Optimize training with feature selection by avoiding any computations on the features that are not selected once we have reached our target number of features. PiperOrigin-RevId: 187526964 --- .../boosted_trees/kernels/model_ops.cc | 57 +++ .../boosted_trees/kernels/training_ops.cc | 28 +- .../contrib/boosted_trees/ops/model_ops.cc | 27 ++ .../python/kernel_tests/model_ops_test.py | 16 + .../python/kernel_tests/training_ops_test.py | 190 +-------- .../boosted_trees/python/ops/model_ops.py | 1 + .../python/training/functions/gbdt_batch.py | 34 +- .../training/functions/gbdt_batch_test.py | 376 ++++++++++++++++++ 8 files changed, 517 insertions(+), 212 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc index 754b7bc327..3bf33186ec 100644 --- a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc @@ -137,6 +137,61 @@ class TreeEnsembleDeserializeOp : public OpKernel { } }; +class TreeEnsembleUsedHandlersOp : public OpKernel { + public: + explicit TreeEnsembleUsedHandlersOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, + context->GetAttr("num_all_handlers", &num_handlers_)); + } + + void Compute(OpKernelContext* context) override { + boosted_trees::models::DecisionTreeEnsembleResource* ensemble_resource; + + OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), + &ensemble_resource)); + tf_shared_lock l(*ensemble_resource->get_mutex()); + core::ScopedUnref unref_me(ensemble_resource); + + // Get the stamp token. + const Tensor* stamp_token_t; + OP_REQUIRES_OK(context, context->input("stamp_token", &stamp_token_t)); + int64 stamp_token = stamp_token_t->scalar()(); + + // Only the Chief should run this Op and it is guaranteed to be in + // a consistent state so the stamps must always match. + CHECK(ensemble_resource->is_stamp_valid(stamp_token)); + + Tensor* output_used_handlers_t = nullptr; + OP_REQUIRES_OK( + context, context->allocate_output("used_handlers_mask", {num_handlers_}, + &output_used_handlers_t)); + auto output_used_handlers = output_used_handlers_t->vec(); + + Tensor* output_num_used_handlers_t = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output("num_used_handlers", {}, + &output_num_used_handlers_t)); + int handler_idx = 0; + std::vector used_handlers = ensemble_resource->GetUsedHandlers(); + output_num_used_handlers_t->scalar()() = used_handlers.size(); + for (int64 i = 0; i < num_handlers_; ++i) { + if (handler_idx >= used_handlers.size() || + used_handlers[handler_idx] > i) { + output_used_handlers(i) = false; + } else { + OP_REQUIRES(context, used_handlers[handler_idx] == i, + errors::InvalidArgument("Handler IDs should be sorted.")); + ++handler_idx; + output_used_handlers(i) = true; + } + } + } + + private: + int64 num_handlers_; +}; + REGISTER_RESOURCE_HANDLE_KERNEL(DecisionTreeEnsembleResource); REGISTER_KERNEL_BUILDER( @@ -155,5 +210,7 @@ REGISTER_KERNEL_BUILDER(Name("TreeEnsembleSerialize").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("TreeEnsembleDeserialize").Device(DEVICE_CPU), TreeEnsembleDeserializeOp); +REGISTER_KERNEL_BUILDER(Name("TreeEnsembleUsedHandlers").Device(DEVICE_CPU), + TreeEnsembleUsedHandlersOp); } // namespace boosted_trees } // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc index 7f8dea1d3c..1bfeed3066 100644 --- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc @@ -361,27 +361,10 @@ class GrowTreeEnsembleOp : public OpKernel { // Increment attempt stats. ensemble_resource->IncrementAttempts(); - // In case we want to do feature selection and we have reached the limit, - // build a list of handlers used so far to avoid adding new features. - std::vector allowed_handlers; - if (learner_config_.constraints().max_number_of_unique_feature_columns() > - 0) { - allowed_handlers = ensemble_resource->GetUsedHandlers(); - // TODO(soroush): We can disable handlers that are not going to be used to - // avoid unnecessary computations. - if (allowed_handlers.size() < - learner_config_.constraints() - .max_number_of_unique_feature_columns()) { - // We have not reached the limit yet. Empty the list of allow features - // which means we can keep adding new features. - allowed_handlers.clear(); - } - } - // Find best splits for each active partition. std::map best_splits; - FindBestSplitsPerPartition(context, allowed_handlers, partition_ids_list, - gains_list, splits_list, &best_splits); + FindBestSplitsPerPartition(context, partition_ids_list, gains_list, + splits_list, &best_splits); // No-op if no new splits can be considered. if (best_splits.empty()) { @@ -422,19 +405,12 @@ class GrowTreeEnsembleOp : public OpKernel { // and finds the best split for each partition. void FindBestSplitsPerPartition( OpKernelContext* const context, - const std::vector& allowed_handlers, // Empty means all handlers. const OpInputList& partition_ids_list, const OpInputList& gains_list, const OpInputList& splits_list, std::map* best_splits) { // Find best split per partition going through every feature candidate. // TODO(salehay): Is this worth parallelizing? for (int64 handler_id = 0; handler_id < num_handlers_; ++handler_id) { - if (!allowed_handlers.empty()) { - if (!std::binary_search(allowed_handlers.begin(), - allowed_handlers.end(), handler_id)) { - continue; - } - } const auto& partition_ids = partition_ids_list[handler_id].vec(); const auto& gains = gains_list[handler_id].vec(); const auto& splits = splits_list[handler_id].vec(); diff --git a/tensorflow/contrib/boosted_trees/ops/model_ops.cc b/tensorflow/contrib/boosted_trees/ops/model_ops.cc index 0786c41664..9d6343c7e8 100644 --- a/tensorflow/contrib/boosted_trees/ops/model_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/model_ops.cc @@ -110,5 +110,32 @@ stamp_token: Token to use as the new value of the resource stamp. tree_ensemble_config: Serialized proto of the ensemble. )doc"); +REGISTER_OP("TreeEnsembleUsedHandlers") + .Attr("num_all_handlers: int >= 0") + .Input("tree_ensemble_handle: resource") + .Input("stamp_token: int64") + .Output("num_used_handlers: int64") + .Output("used_handlers_mask: bool") + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle unused_input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused_input)); + c->set_output(0, c->Scalar()); + int num_all_handlers; + c->GetAttr("num_all_handlers", &num_all_handlers).IgnoreError(); + c->set_output(1, {c->Vector(num_all_handlers)}); + + return Status::OK(); + }) + .Doc(R"doc( +Returns the mask of used handlers along with the number of non-zero elements in +this mask. Used in feature selection. + +tree_ensemble_handle: Handle to the tree ensemble. +stamp_token: Token to use as the new value of the resource stamp. +num_used_handlers: number of feature column handlers used in the model. +used_handlers_mask: A boolean vector of showing which handlers are used in the + model. +)doc"); + } // namespace boosted_trees } // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py index 27c288bbf7..63b9c5fddf 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py @@ -310,6 +310,22 @@ class ModelOpsTest(test_util.TensorFlowTestCase): # The third tree was added after the save. self.assertAllClose(result.eval(), [[-1.1], [-1.1]]) + def testUsedHandlers(self): + with self.test_session(): + tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() + tree_ensemble_config.growing_metadata.used_handler_ids.append(1) + tree_ensemble_config.growing_metadata.used_handler_ids.append(5) + stamp_token = 3 + tree_ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=stamp_token, + tree_ensemble_config=tree_ensemble_config.SerializeToString(), + name="create_tree") + resources.initialize_resources(resources.shared_resources()).run() + result = model_ops.tree_ensemble_used_handlers( + tree_ensemble_handle, stamp_token, num_all_handlers=6) + self.assertAllEqual([0, 1, 0, 0, 0, 1], result.used_handlers_mask.eval()) + self.assertEqual(2, result.num_used_handlers.eval()) + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py index 8ca1aabaca..3e524efbea 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py @@ -1588,7 +1588,7 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): self.assertEqual( 2, tree_ensemble_config.tree_metadata[2].num_tree_weight_updates) - def testGrowExistingEnsembleTreeWithFeatureSelectionCanStillGrow(self): + def testGrowExistingEnsembleTreeWithFeatureSelectionUsedHandlers(self): """Test growing a tree with feature selection.""" with self.test_session() as session: # Create existing ensemble with one root split and one bias tree. @@ -1649,7 +1649,6 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): num_trees_attempted: 2 num_layers_attempted: 2 used_handler_ids: 2 - used_handler_ids: 5 } """, tree_ensemble_config) tree_ensemble_handle = model_ops.tree_ensemble_variable( @@ -1668,183 +1667,8 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): min_node_weight=0, pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE) - # There are 2 handler_ids in used_handler_ids already but one of them - # is handler 2, so we can still grow trees. - learner_config.constraints.max_number_of_unique_feature_columns = 2 - learner_config = learner_config.SerializeToString() - # Prepare handler inputs. - handler1_partitions = np.array([0], dtype=np.int32) - handler1_gains = np.array([7.62], dtype=np.float32) - handler1_split = [_gen_dense_split_info(5, 0.52, -4.375, 7.143)] - handler2_partitions = np.array([0], dtype=np.int32) - handler2_gains = np.array([0.63], dtype=np.float32) - handler2_split = [_gen_dense_split_info(2, 0.23, -0.6, 0.24)] - handler3_partitions = np.array([0], dtype=np.int32) - handler3_gains = np.array([7.62], dtype=np.float32) - handler3_split = [_gen_categorical_split_info(8, 7, -4.375, 7.143)] - - # Grow tree ensemble. - grow_op = training_ops.grow_tree_ensemble( - tree_ensemble_handle, - stamp_token=0, - next_stamp_token=1, - learning_rate=1, - partition_ids=[ - handler1_partitions, handler2_partitions, handler3_partitions - ], - gains=[handler1_gains, handler2_gains, handler3_gains], - splits=[handler1_split, handler2_split, handler3_split], - learner_config=learner_config, - dropout_seed=123, - center_bias=True) - session.run(grow_op) - - # Expect a new tree to be added with the split from handler 1. - _, serialized = session.run( - model_ops.tree_ensemble_serialize(tree_ensemble_handle)) - tree_ensemble_config.ParseFromString(serialized) - self.assertEqual(3, len(tree_ensemble_config.trees)) - self.assertEqual( - 2, len(tree_ensemble_config.growing_metadata.used_handler_ids)) - - def testGrowExistingEnsembleTreeWithFeatureSelectionEmptyEnsemble(self): - """Test growing a tree with feature selection with empty ensemble.""" - with self.test_session() as session: - # Create existing ensemble with one root split and one bias tree. - tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() - tree_ensemble_handle = model_ops.tree_ensemble_variable( - stamp_token=0, - tree_ensemble_config=tree_ensemble_config.SerializeToString(), - name="tree_ensemble") - resources.initialize_resources(resources.shared_resources()).run() - - # Prepare learner config. - learner_config = _gen_learner_config( - num_classes=2, - l1_reg=0, - l2_reg=0, - tree_complexity=0, - max_depth=1, - min_node_weight=0, - pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, - growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE) - learner_config.constraints.max_number_of_unique_feature_columns = 2 - learner_config = learner_config.SerializeToString() - # Prepare handler inputs. - handler1_partitions = np.array([0], dtype=np.int32) - handler1_gains = np.array([7.62], dtype=np.float32) - handler1_split = [_gen_dense_split_info(5, 0.52, -4.375, 7.143)] - handler2_partitions = np.array([0], dtype=np.int32) - handler2_gains = np.array([0.63], dtype=np.float32) - handler2_split = [_gen_dense_split_info(2, 0.23, -0.6, 0.24)] - handler3_partitions = np.array([0], dtype=np.int32) - handler3_gains = np.array([7.62], dtype=np.float32) - handler3_split = [_gen_categorical_split_info(8, 7, -4.375, 7.143)] - - # Grow tree ensemble. - grow_op = training_ops.grow_tree_ensemble( - tree_ensemble_handle, - stamp_token=0, - next_stamp_token=1, - learning_rate=1, - partition_ids=[ - handler1_partitions, handler2_partitions, handler3_partitions - ], - gains=[handler1_gains, handler2_gains, handler3_gains], - splits=[handler1_split, handler2_split, handler3_split], - learner_config=learner_config, - dropout_seed=123, - center_bias=True) - session.run(grow_op) - - _, serialized = session.run( - model_ops.tree_ensemble_serialize(tree_ensemble_handle)) - tree_ensemble_config.ParseFromString(serialized) - self.assertEqual(1, len(tree_ensemble_config.trees)) - self.assertEqual( - 1, len(tree_ensemble_config.growing_metadata.used_handler_ids)) - - def testGrowExistingEnsembleTreeWithFeatureSelectionCantGrow(self): - """Test growing a tree with feature selection with empty ensemble.""" - with self.test_session() as session: - # Create existing ensemble with one root split and one bias tree. - tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() - text_format.Merge(""" - trees { - nodes { - leaf { - vector { - value: -0.32 - value: 0.28 - } - } - } - } - trees { - nodes { - categorical_id_binary_split { - feature_column: 3 - feature_id: 7 - left_id: 1 - right_id: 2 - } - node_metadata { - gain: 1.3 - } - } - nodes { - leaf { - sparse_vector { - index: 0 - value: 2.3 - } - } - } - nodes { - leaf { - sparse_vector { - index: 0 - value: -0.9 - } - } - } - } - tree_weights: 0.7 - tree_weights: 1 - tree_metadata { - num_tree_weight_updates: 1 - num_layers_grown: 1 - is_finalized: true - } - tree_metadata { - num_tree_weight_updates: 5 - num_layers_grown: 1 - is_finalized: true - } - growing_metadata { - num_trees_attempted: 2 - num_layers_attempted: 2 - used_handler_ids: 4 - used_handler_ids: 5 - } - """, tree_ensemble_config) - tree_ensemble_handle = model_ops.tree_ensemble_variable( - stamp_token=0, - tree_ensemble_config=tree_ensemble_config.SerializeToString(), - name="tree_ensemble") - resources.initialize_resources(resources.shared_resources()).run() - # Prepare learner config. - learner_config = _gen_learner_config( - num_classes=2, - l1_reg=0, - l2_reg=0, - tree_complexity=0, - max_depth=1, - min_node_weight=0, - pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, - growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE) - learner_config.constraints.max_number_of_unique_feature_columns = 2 + learner_config.constraints.max_number_of_unique_feature_columns = 3 learner_config = learner_config.SerializeToString() # Prepare handler inputs. handler1_partitions = np.array([0], dtype=np.int32) @@ -1876,12 +1700,10 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): _, serialized = session.run( model_ops.tree_ensemble_serialize(tree_ensemble_handle)) tree_ensemble_config.ParseFromString(serialized) - # We can't grow a tree since we have reached the limit of 2 unique - # features [4, 5] and the only available splits are from - # handlers [0, 1, 2]. - self.assertEqual(2, len(tree_ensemble_config.trees)) - self.assertEqual( - 2, len(tree_ensemble_config.growing_metadata.used_handler_ids)) + self.assertEqual(3, len(tree_ensemble_config.trees)) + # 2 was already used. handler 0 is being added in this tree. + self.assertAllEqual( + [0, 2], tree_ensemble_config.growing_metadata.used_handler_ids) if __name__ == "__main__": diff --git a/tensorflow/contrib/boosted_trees/python/ops/model_ops.py b/tensorflow/contrib/boosted_trees/python/ops/model_ops.py index 7a5f509047..25b2c9e2fd 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/model_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/model_ops.py @@ -25,6 +25,7 @@ from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensem from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensemble_serialize # pylint: disable=unused-import from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensemble_stamp_token +from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensemble_used_handlers # pylint: enable=unused-import from tensorflow.python.framework import ops diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index f0b66dcbbe..233e21f1cf 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -57,6 +57,8 @@ PREDICTIONS = "predictions" PARTITION_IDS = "partition_ids" NUM_LAYERS_ATTEMPTED = "num_layers" NUM_TREES_ATTEMPTED = "num_trees" +NUM_USED_HANDLERS = "num_used_handlers" +USED_HANDLERS_MASK = "used_handlers_mask" _FEATURE_NAME_TEMPLATE = "%s_%d" @@ -70,7 +72,8 @@ def _get_column_by_index(tensor, indices): return array_ops.reshape(array_ops.gather(p_flat, i_flat), [shape[0], -1]) -def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats): +def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats, + used_handlers): """Returns predictions for the given logits and n_classes. Args: @@ -79,6 +82,8 @@ def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats): that contains predictions when no dropout was applied. partition_ids: A rank 1 `Tensor` with shape [batch_size]. ensemble_stats: A TreeEnsembleStatsOp result tuple. + used_handlers: A TreeEnsembleUsedHandlerOp result tuple of an int and a + boolean mask.. Returns: A dict of predictions. @@ -89,6 +94,8 @@ def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats): result[PARTITION_IDS] = partition_ids result[NUM_LAYERS_ATTEMPTED] = ensemble_stats.attempted_layers result[NUM_TREES_ATTEMPTED] = ensemble_stats.attempted_trees + result[NUM_USED_HANDLERS] = used_handlers.num_used_handlers + result[USED_HANDLERS_MASK] = used_handlers.used_handlers_mask return result @@ -361,6 +368,13 @@ class GradientBoostedDecisionTreeModel(object): """ ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle, ensemble_stamp) + num_handlers = ( + len(self._dense_floats) + len(self._sparse_float_shapes) + + len(self._sparse_int_shapes)) + # Used during feature selection. + used_handlers = model_ops.tree_ensemble_used_handlers( + ensemble_handle, ensemble_stamp, num_all_handlers=num_handlers) + # We don't need dropout info - we can always restore it based on the # seed. apply_dropout, seed = _dropout_params(mode, ensemble_stats) @@ -395,7 +409,7 @@ class GradientBoostedDecisionTreeModel(object): use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, partition_ids, - ensemble_stats) + ensemble_stats, used_handlers) def predict(self, mode): """Returns predictions given the features and mode. @@ -716,6 +730,22 @@ class GradientBoostedDecisionTreeModel(object): else: active_handlers = array_ops.ones([len(handlers), 2], dtype=dtypes.bool) + if self._learner_config.constraints.max_number_of_unique_feature_columns: + target = ( + self._learner_config.constraints.max_number_of_unique_feature_columns) + + def _feature_selection_active_handlers(): + # The active list for current and the next iteration. + used_handlers = array_ops.reshape(predictions_dict[USED_HANDLERS_MASK], + [-1, 1]) + used_handlers = array_ops.concat([used_handlers, used_handlers], axis=1) + return math_ops.logical_and(used_handlers, active_handlers) + + active_handlers = ( + control_flow_ops.cond(predictions_dict[NUM_USED_HANDLERS] >= target, + _feature_selection_active_handlers, + lambda: active_handlers)) + # Prepare empty gradients and hessians when handlers are not ready. empty_hess_shape = [1] + hessian_shape.as_list() empty_grad_shape = [1] + gradient_shape.as_list() diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index dba51d4f52..6411f57a54 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -47,6 +47,38 @@ def _squared_loss(label, unused_weights, predictions): return loss +def _append_to_leaf(leaf, c_id, w): + """Helper method for building tree leaves. + + Appends weight contributions for the given class index to a leaf node. + + Args: + leaf: leaf node to append to. + c_id: class Id for the weight update. + w: weight contribution value. + """ + leaf.sparse_vector.index.append(c_id) + leaf.sparse_vector.value.append(w) + + +def _set_float_split(split, feat_col, thresh, l_id, r_id): + """Helper method for building tree float splits. + + Sets split feature column, threshold and children. + + Args: + split: split node to update. + feat_col: feature column for the split. + thresh: threshold to split on forming rule x <= thresh. + l_id: left child Id. + r_id: right child Id. + """ + split.feature_column = feat_col + split.threshold = thresh + split.left_id = l_id + split.right_id = r_id + + class GbdtTest(test_util.TensorFlowTestCase): def setUp(self): @@ -917,6 +949,350 @@ class GbdtTest(test_util.TensorFlowTestCase): output.trees[0].nodes[2].leaf.sparse_vector.value[0], atol=1e-4, rtol=1e-4) + def testTrainFnChiefFeatureSelectionReachedLimitNoGoodSplit(self): + """Tests the train function running on chief with feature selection.""" + with self.test_session() as sess: + ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, tree_ensemble_config="", name="tree_ensemble") + learner_config = learner_pb2.LearnerConfig() + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + learner_config.num_classes = 2 + learner_config.regularization.l1 = 0 + learner_config.regularization.l2 = 0 + learner_config.constraints.max_tree_depth = 1 + learner_config.constraints.max_number_of_unique_feature_columns = 1 + learner_config.constraints.min_node_weight = 0 + features = {} + features["dense_float_0"] = array_ops.ones([4, 1], dtypes.float32) + # Feature 1 is predictive but it won't be used because we have reached the + # limit of num_used_handlers >= max_number_of_unique_feature_columns + features["dense_float_1"] = array_ops.constant([0, 0, 1, 1], + dtypes.float32) + + gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( + is_chief=True, + num_ps_replicas=0, + center_bias=False, + ensemble_handle=ensemble_handle, + examples_per_layer=1, + learner_config=learner_config, + logits_dimension=1, + features=features) + + predictions = array_ops.constant( + [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) + partition_ids = array_ops.zeros([4], dtypes.int32) + ensemble_stamp = variables.Variable( + initial_value=0, + name="ensemble_stamp", + trainable=False, + dtype=dtypes.int64) + + predictions_dict = { + "predictions": + predictions, + "predictions_no_dropout": + predictions, + "partition_ids": + partition_ids, + "ensemble_stamp": + ensemble_stamp, + "num_trees": + 12, + "num_used_handlers": + array_ops.constant(1, dtype=dtypes.int64), + "used_handlers_mask": + array_ops.constant([True, False], dtype=dtypes.bool), + } + + labels = array_ops.constant([0, 0, 1, 1], dtypes.float32) + weights = array_ops.ones([4, 1], dtypes.float32) + # Create train op. + train_op = gbdt_model.train( + loss=math_ops.reduce_mean( + _squared_loss(labels, weights, predictions)), + predictions_dict=predictions_dict, + labels=labels) + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + + # On first run, expect no splits to be chosen because the quantile + # buckets will not be ready. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 0) + self.assertEquals(len(output.tree_weights), 0) + self.assertEquals(stamp_token.eval(), 1) + + # Update the stamp to be able to run a second time. + sess.run([ensemble_stamp.assign_add(1)]) + + # On second run, expect a trivial split to be chosen to basically + # predict the average. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 1) + self.assertAllClose(output.tree_weights, [0.1]) + self.assertEquals(stamp_token.eval(), 2) + expected_tree = """ + nodes { + dense_float_binary_split { + feature_column: 0 + threshold: 1.0 + left_id: 1 + right_id: 2 + } + node_metadata { + gain: 0 + } + } + nodes { + leaf { + vector { + value: -0.25 + } + } + } + nodes { + leaf { + vector { + value: 0.0 + } + } + }""" + self.assertProtoEquals(expected_tree, output.trees[0]) + + def testTrainFnChiefFeatureSelectionWithGoodSplits(self): + """Tests the train function running on chief with feature selection.""" + with self.test_session() as sess: + ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, tree_ensemble_config="", name="tree_ensemble") + learner_config = learner_pb2.LearnerConfig() + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + learner_config.num_classes = 2 + learner_config.regularization.l1 = 0 + learner_config.regularization.l2 = 0 + learner_config.constraints.max_tree_depth = 1 + learner_config.constraints.max_number_of_unique_feature_columns = 1 + learner_config.constraints.min_node_weight = 0 + features = {} + features["dense_float_0"] = array_ops.ones([4, 1], dtypes.float32) + # Feature 1 is predictive and is in our selected features so it will be + # used even when we're at the limit. + features["dense_float_1"] = array_ops.constant([0, 0, 1, 1], + dtypes.float32) + + gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( + is_chief=True, + num_ps_replicas=0, + center_bias=False, + ensemble_handle=ensemble_handle, + examples_per_layer=1, + learner_config=learner_config, + logits_dimension=1, + features=features) + + predictions = array_ops.constant( + [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) + partition_ids = array_ops.zeros([4], dtypes.int32) + ensemble_stamp = variables.Variable( + initial_value=0, + name="ensemble_stamp", + trainable=False, + dtype=dtypes.int64) + + predictions_dict = { + "predictions": + predictions, + "predictions_no_dropout": + predictions, + "partition_ids": + partition_ids, + "ensemble_stamp": + ensemble_stamp, + "num_trees": + 12, + "num_used_handlers": + array_ops.constant(1, dtype=dtypes.int64), + "used_handlers_mask": + array_ops.constant([False, True], dtype=dtypes.bool), + } + + labels = array_ops.constant([0, 0, 1, 1], dtypes.float32) + weights = array_ops.ones([4, 1], dtypes.float32) + # Create train op. + train_op = gbdt_model.train( + loss=math_ops.reduce_mean( + _squared_loss(labels, weights, predictions)), + predictions_dict=predictions_dict, + labels=labels) + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + + # On first run, expect no splits to be chosen because the quantile + # buckets will not be ready. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 0) + self.assertEquals(len(output.tree_weights), 0) + self.assertEquals(stamp_token.eval(), 1) + + # Update the stamp to be able to run a second time. + sess.run([ensemble_stamp.assign_add(1)]) + + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + + self.assertEquals(len(output.trees), 1) + self.assertAllClose(output.tree_weights, [0.1]) + self.assertEquals(stamp_token.eval(), 2) + expected_tree = """ + nodes { + dense_float_binary_split { + feature_column: 1 + left_id: 1 + right_id: 2 + } + node_metadata { + gain: 0.5 + } + } + nodes { + leaf { + vector { + value: 0.0 + } + } + } + nodes { + leaf { + vector { + value: -0.5 + } + } + }""" + self.assertProtoEquals(expected_tree, output.trees[0]) + + def testTrainFnChiefFeatureSelectionReachedLimitIncrementAttemptedLayer(self): + """Tests the train function running on chief with feature selection.""" + with self.test_session() as sess: + tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() + tree = tree_ensemble_config.trees.add() + + _set_float_split(tree.nodes.add() + .sparse_float_binary_split_default_right.split, 2, 4.0, + 1, 2) + _append_to_leaf(tree.nodes.add().leaf, 0, 0.5) + _append_to_leaf(tree.nodes.add().leaf, 1, 1.2) + tree_ensemble_config.tree_weights.append(1.0) + metadata = tree_ensemble_config.tree_metadata.add() + metadata.is_finalized = False + metadata.num_layers_grown = 1 + tree_ensemble_config = tree_ensemble_config.SerializeToString() + ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, tree_ensemble_config=tree_ensemble_config, + name="tree_ensemble") + learner_config = learner_pb2.LearnerConfig() + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + learner_config.num_classes = 2 + learner_config.regularization.l1 = 0 + learner_config.regularization.l2 = 0 + learner_config.constraints.max_tree_depth = 1 + learner_config.constraints.max_number_of_unique_feature_columns = 1 + learner_config.constraints.min_node_weight = 0 + features = {} + # Both features will be disabled since the feature selection limit is + # already reached. + features["dense_float_0"] = array_ops.ones([4, 1], dtypes.float32) + features["dense_float_1"] = array_ops.constant([0, 0, 1, 1], + dtypes.float32) + + gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( + is_chief=True, + num_ps_replicas=0, + center_bias=False, + ensemble_handle=ensemble_handle, + examples_per_layer=1, + learner_config=learner_config, + logits_dimension=1, + features=features) + + predictions = array_ops.constant( + [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) + partition_ids = array_ops.zeros([4], dtypes.int32) + ensemble_stamp = variables.Variable( + initial_value=0, + name="ensemble_stamp", + trainable=False, + dtype=dtypes.int64) + + predictions_dict = { + "predictions": + predictions, + "predictions_no_dropout": + predictions, + "partition_ids": + partition_ids, + "ensemble_stamp": + ensemble_stamp, + "num_trees": + 12, + # We have somehow reached our limit 1. Both of the handlers will be + # disabled. + "num_used_handlers": + array_ops.constant(1, dtype=dtypes.int64), + "used_handlers_mask": + array_ops.constant([False, False], dtype=dtypes.bool), + } + + labels = array_ops.constant([0, 0, 1, 1], dtypes.float32) + weights = array_ops.ones([4, 1], dtypes.float32) + # Create train op. + train_op = gbdt_model.train( + loss=math_ops.reduce_mean( + _squared_loss(labels, weights, predictions)), + predictions_dict=predictions_dict, + labels=labels) + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + + # On first run, expect no splits to be chosen because the quantile + # buckets will not be ready. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 1) + self.assertEquals(output.growing_metadata.num_layers_attempted, 1) + self.assertEquals(stamp_token.eval(), 1) + + # Update the stamp to be able to run a second time. + sess.run([ensemble_stamp.assign_add(1)]) + + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + # Make sure the trees are not modified, but the num_layers_attempted is + # incremented so that eventually the training stops. + self.assertEquals(len(output.trees), 1) + self.assertEquals(len(output.trees[0].nodes), 3) + + self.assertEquals(output.growing_metadata.num_layers_attempted, 2) if __name__ == "__main__": googletest.main() -- GitLab From f8f4a6e26cc1108495c0b9a55d9a7d6e7005c2b5 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Thu, 1 Mar 2018 14:15:20 -0800 Subject: [PATCH 221/311] Internal change. PiperOrigin-RevId: 187532378 --- tensorflow/c/c_test_util.cc | 31 +++++++++++++++++-- tensorflow/c/c_test_util.h | 9 ++++++ .../common_runtime/graph_execution_state.cc | 4 +++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc index 3db2852ce6..53346a8cdf 100644 --- a/tensorflow/c/c_test_util.cc +++ b/tensorflow/c/c_test_util.cc @@ -34,6 +34,10 @@ static void DoubleDeallocator(void* data, size_t, void* arg) { delete[] static_cast(data); } +static void FloatDeallocator(void* data, size_t, void* arg) { + delete[] static_cast(data); +} + TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values) { int64_t num_values = 1; for (int i = 0; i < num_dims; ++i) { @@ -78,13 +82,21 @@ TF_Tensor* DoubleTensor(double v) { &DoubleDeallocator, nullptr); } +TF_Tensor* FloatTensor(float v) { + const int num_bytes = sizeof(float); + float* values = new float[1]; + values[0] = v; + return TF_NewTensor(TF_FLOAT, nullptr, 0, values, num_bytes, + &FloatDeallocator, nullptr); +} + // All the *Helper methods are used as a workaround for the restrictions that // one cannot call ASSERT_* methods in non-void-returning functions (when // exceptions are disabled during compilation) void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name, - TF_Operation** op) { + TF_DataType dtype, TF_Operation** op) { TF_OperationDescription* desc = TF_NewOperation(graph, "Placeholder", name); - TF_SetAttrType(desc, "dtype", TF_INT32); + TF_SetAttrType(desc, "dtype", dtype); *op = TF_FinishOperation(desc, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); ASSERT_NE(*op, nullptr); @@ -92,7 +104,14 @@ void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name, TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name) { TF_Operation* op; - PlaceholderHelper(graph, s, name, &op); + PlaceholderHelper(graph, s, name, TF_INT32, &op); + return op; +} + +TF_Operation* PlaceholderFloat(TF_Graph* graph, TF_Status* s, + const char* name) { + TF_Operation* op; + PlaceholderHelper(graph, s, name, TF_FLOAT, &op); return op; } @@ -126,6 +145,12 @@ TF_Operation* ScalarConst(double v, TF_Graph* graph, TF_Status* s, return Const(tensor.get(), graph, s, name); } +TF_Operation* ScalarConst(float v, TF_Graph* graph, TF_Status* s, + const char* name) { + unique_tensor_ptr tensor(FloatTensor(v), TF_DeleteTensor); + return Const(tensor.get(), graph, s, name); +} + void AddOpHelper(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name, TF_Operation** op, bool check) { diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index 2a70177c72..8cf060f73f 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -44,8 +44,14 @@ TF_Tensor* Int32Tensor(int32_t v); TF_Tensor* DoubleTensor(double v); +TF_Tensor* FloatTensor(float v); + +// TODO(hongm): Change Placeholder() to take in a TF_DataType parameter, and +// unify with PlaceholderFloat. TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name = "feed"); +TF_Operation* PlaceholderFloat(TF_Graph* graph, TF_Status* s, + const char* name = "feed"); TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s, const char* name = "const"); @@ -56,6 +62,9 @@ TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s, TF_Operation* ScalarConst(double v, TF_Graph* graph, TF_Status* s, const char* name = "scalar"); +TF_Operation* ScalarConst(float v, TF_Graph* graph, TF_Status* s, + const char* name = "scalar"); + TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name = "add"); diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index 33a5d60eb7..785ec3d227 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -73,6 +73,10 @@ GraphExecutionState::~GraphExecutionState() { /* static */ Status GraphExecutionState::MakeForBaseGraph( GraphDef* graph_def, const GraphExecutionStateOptions& options, std::unique_ptr* out_state) { +#ifndef __ANDROID__ + VLOG(1) << "Graph proto is " << graph_def->DebugString(); +#endif // __ANDROID__ + std::unique_ptr ret( new GraphExecutionState(graph_def, options)); -- GitLab From 3973e772ed84db08cb86b1086558223af29fd64a Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Thu, 1 Mar 2018 14:15:23 -0800 Subject: [PATCH 222/311] Sampling group embeddings for each child. PiperOrigin-RevId: 187532388 --- .../grappler/hierarchical_controller.py | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/grappler/hierarchical_controller.py b/tensorflow/python/grappler/hierarchical_controller.py index b06fb3c6d0..c0866c1069 100644 --- a/tensorflow/python/grappler/hierarchical_controller.py +++ b/tensorflow/python/grappler/hierarchical_controller.py @@ -258,9 +258,11 @@ class HierarchicalController(Controller): "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1]) seq2seq_input_layer = array_ops.placeholder_with_default( - array_ops.zeros([1, self.num_groups, self.group_emb_size], + array_ops.zeros([self.hparams.num_children, + self.num_groups, + self.group_emb_size], dtypes.float32), - shape=(1, self.num_groups, self.group_emb_size)) + shape=(self.hparams.num_children, self.num_groups, self.group_emb_size)) self.seq2seq_input_layer = seq2seq_input_layer def compute_reward(self, run_time): @@ -585,12 +587,29 @@ class HierarchicalController(Controller): """Approximating the blocks of a TF graph from a graph_def. Args: - grouping_actions: grouping predictions + grouping_actions: grouping predictions. verbose: print stuffs. Returns: groups: list of groups. """ + groups = [ + self._create_group_embeddings(grouping_actions, i, verbose) for + i in range(self.hparams.num_children) + ] + return np.stack(groups, axis=0) + + def _create_group_embeddings(self, grouping_actions, child_id, verbose=False): + """Approximating the blocks of a TF graph from a graph_def for each child. + + Args: + grouping_actions: grouping predictions. + child_id: child_id for the group. + verbose: print stuffs. + + Returns: + groups: group embedding for the child_id. + """ if verbose: print("Processing input_graph") @@ -599,13 +618,13 @@ class HierarchicalController(Controller): dag_matrix = np.zeros([self.num_groups, self.num_groups], dtype=np.float32) for op in self.important_ops: topo_op_index = self.name_to_topo_order_index[op.name] - # TODO(agoldie) child_id - group_index = grouping_actions[0][topo_op_index] + group_index = grouping_actions[child_id][topo_op_index] for output_op in self.get_node_fanout(op): if output_op.name not in self.important_op_names: continue - output_group_index = grouping_actions[0][self.name_to_topo_order_index[ - output_op.name]] + output_group_index = ( + grouping_actions[child_id][self.name_to_topo_order_index[ + output_op.name]]) dag_matrix[group_index, output_group_index] += 1.0 num_connections = np.sum(dag_matrix) num_intra_group_connections = dag_matrix.trace() @@ -648,7 +667,8 @@ class HierarchicalController(Controller): ], dtype=np.float32) for op_index, op in enumerate(self.important_ops): - group_index = grouping_actions[0][self.name_to_topo_order_index[op.name]] + group_index = grouping_actions[child_id][ + self.name_to_topo_order_index[op.name]] type_name = str(op.op) type_index = self.type_dict[type_name] group_embedding[group_index, type_index] += 1 @@ -675,7 +695,7 @@ class HierarchicalController(Controller): shape=[num_children, self.num_groups], trainable=False) - x = array_ops.tile(self.seq2seq_input_layer, [num_children, 1, 1]) + x = self.seq2seq_input_layer last_c, last_h, attn_mem = self.encode(x) actions, log_probs = {}, {} actions["sample"], log_probs["sample"] = ( @@ -988,8 +1008,7 @@ class HierarchicalController(Controller): def generate_placement(self, grouping, sess): controller_ops = self.ops["controller"] feed_seq2seq_input_dict = {} - feed_seq2seq_input_dict[self.seq2seq_input_layer] = np.expand_dims( - grouping, axis=0) + feed_seq2seq_input_dict[self.seq2seq_input_layer] = grouping sess.run( controller_ops["y_preds"]["sample"], feed_dict=feed_seq2seq_input_dict) -- GitLab From 759da7754a708f1f64e4b4b2e17cd4d8c42e3ed3 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 1 Mar 2018 14:26:07 -0800 Subject: [PATCH 223/311] Set more generated ops to 'hidden'. These ops have not been hidden before but instead have corresponding definitions in Python files. We don't want tf_export decorators for the generated ops since corresponding Python ops have tf_export decorators instead. PiperOrigin-RevId: 187534113 --- tensorflow/core/api_def/python_api/api_def_Angle.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Cast.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Gather.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Imag.pbtxt | 4 ++++ .../api_def/python_api/api_def_IsVariableInitialized.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ParseSingleExample.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Real.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ReverseSequence.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Shape.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Size.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SparseSegmentMean.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SparseSegmentSum.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Where.pbtxt | 4 ++++ 25 files changed, 100 insertions(+) create mode 100644 tensorflow/core/api_def/python_api/api_def_Angle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Cast.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Gather.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Imag.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Real.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Shape.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Size.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Where.pbtxt diff --git a/tensorflow/core/api_def/python_api/api_def_Angle.pbtxt b/tensorflow/core/api_def/python_api/api_def_Angle.pbtxt new file mode 100644 index 0000000000..771e861fd1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Angle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Angle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt b/tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt new file mode 100644 index 0000000000..551b51db26 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Bincount" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Cast.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cast.pbtxt new file mode 100644 index 0000000000..428aa62c46 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Cast.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Cast" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt new file mode 100644 index 0000000000..8f5e2f061b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Cumprod" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt new file mode 100644 index 0000000000..715f26fcac --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Cumsum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt b/tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt new file mode 100644 index 0000000000..fd0766b365 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DepthToSpace" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Gather.pbtxt b/tensorflow/core/api_def/python_api/api_def_Gather.pbtxt new file mode 100644 index 0000000000..5f956930e0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Gather.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Gather" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Imag.pbtxt b/tensorflow/core/api_def/python_api/api_def_Imag.pbtxt new file mode 100644 index 0000000000..5632fd4365 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Imag.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Imag" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt b/tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt new file mode 100644 index 0000000000..6a7b078909 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IsVariableInitialized" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt b/tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt new file mode 100644 index 0000000000..9b65433580 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Multinomial" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt b/tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt new file mode 100644 index 0000000000..c058e5b1ab --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OnesLike" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt new file mode 100644 index 0000000000..4193bdd091 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParseSingleExample" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt new file mode 100644 index 0000000000..40673234ed --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Real.pbtxt b/tensorflow/core/api_def/python_api/api_def_Real.pbtxt new file mode 100644 index 0000000000..52a9089f4a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Real.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Real" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt b/tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt new file mode 100644 index 0000000000..0fde5942ab --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ReduceJoin" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt b/tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt new file mode 100644 index 0000000000..f3fc2578df --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ReverseSequence" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Shape.pbtxt b/tensorflow/core/api_def/python_api/api_def_Shape.pbtxt new file mode 100644 index 0000000000..bd7b5ad36c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Shape.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Shape" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Size.pbtxt b/tensorflow/core/api_def/python_api/api_def_Size.pbtxt new file mode 100644 index 0000000000..7f76173a5d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Size.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Size" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt b/tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt new file mode 100644 index 0000000000..d56a7384eb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SpaceToDepth" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt new file mode 100644 index 0000000000..f12c2e2073 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentMean" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt new file mode 100644 index 0000000000..7daaa81482 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSqrtN" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt new file mode 100644 index 0000000000..e7028efce2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt b/tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt new file mode 100644 index 0000000000..a55fa98877 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StridedSlice" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt b/tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt new file mode 100644 index 0000000000..e22b6a040e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Transpose" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Where.pbtxt b/tensorflow/core/api_def/python_api/api_def_Where.pbtxt new file mode 100644 index 0000000000..d4dd25a206 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Where.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Where" + visibility: HIDDEN +} -- GitLab From a8bcf9c5b2ea7c88c3034d1b4c5d62c209a6b431 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 14:35:44 -0800 Subject: [PATCH 224/311] Expose native inference latency via TFlite interpreter. PiperOrigin-RevId: 187535695 --- .../main/java/org/tensorflow/lite/Interpreter.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index 9286814b74..b071cda5df 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -167,6 +167,19 @@ public final class Interpreter implements AutoCloseable { return wrapper.getOutputIndex(opName); } + + /** + * Returns native inference timing. + *

IllegalArgumentException will be thrown if the model is not initialized by the + * {@link Interpreter}. + */ + public Long getLastNativeInferenceDurationNanoseconds() { + if (wrapper == null) { + throw new IllegalStateException("The interpreter has already been closed."); + } + return wrapper.getLastNativeInferenceDurationNanoseconds(); + } + /** Release resources associated with the {@code Interpreter}. */ @Override public void close() { -- GitLab From 8b10f9c7a0a67282061275302b00c254b609b7f6 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 1 Mar 2018 14:49:49 -0800 Subject: [PATCH 225/311] EagerTensor.device reflects the op's device and not the tensor's memory space. This matches graph mode's behavior. PiperOrigin-RevId: 187537818 --- tensorflow/c/eager/c_api.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 252ceab54a..4b619dc4e1 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -180,12 +180,10 @@ int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, } const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) { - // TODO(apassos) this will be potentially incorrect in the distributed case as - // our local device will have a name which depends on the ClusterSpec and - // hence will require the context to resolve. status->status = tensorflow::Status::OK(); - return (h->d == nullptr) ? "/job:localhost/replica:0/task:0/device:CPU:0" - : h->d->name().c_str(); + return (h->op_device == nullptr) + ? "/job:localhost/replica:0/task:0/device:CPU:0" + : h->op_device->name().c_str(); } TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { -- GitLab From 4d1a2894b7faa7d9576e82e291758c0da0616b47 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 1 Mar 2018 15:09:23 -0800 Subject: [PATCH 226/311] Added support for optimization of functions with fixed input/output types PiperOrigin-RevId: 187540982 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/function_optimizer.cc | 35 +++++--- .../optimizers/function_optimizer_test.cc | 87 +++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 2 +- 4 files changed, 111 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index b8995ef365..037438ee75 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -144,6 +144,7 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/utils:functions", ], ) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index ba8a76ad5f..a5cf00c155 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/functions.h" namespace tensorflow { @@ -53,13 +54,17 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, AttrValue::ListValue* type_list = (*func_inputs->mutable_attr())["T"].mutable_list(); for (const OpDef::ArgDef& arg : func.signature().input_arg()) { - auto it = attr.find(arg.type_attr()); - if (it == attr.end()) { - return errors::InvalidArgument("Invalid input argument ", arg.name(), - " for function ", node.op(), - " instantiated by ", node.name()); + if (arg.type() != DT_INVALID) { + type_list->add_type(arg.type()); + } else { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid input argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); } - type_list->add_type(it->second.type()); } for (NodeDef& func_body_node : *item->graph.mutable_node()) { @@ -75,7 +80,7 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, } else { // Update the input names. for (string& input : *func_body_node.mutable_input()) { - input = strings::StrCat(node.name(), "/", input); + input = AddPrefixToNodeName(input, node.name()); } } @@ -98,13 +103,17 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, func_outputs->set_device(node.device()); type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); for (const OpDef::ArgDef& arg : func.signature().output_arg()) { - auto it = attr.find(arg.type_attr()); - if (it == attr.end()) { - return errors::InvalidArgument("Invalid output argument ", arg.name(), - " for function ", node.op(), - " instantiated by ", node.name()); + if (arg.type() != DT_INVALID) { + type_list->add_type(arg.type()); + } else { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid output argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); } - type_list->add_type(it->second.type()); func_outputs->add_input(strings::StrCat(node.name(), "/", arg.name())); } diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 76a5c08d35..fd61c067ed 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -100,6 +100,93 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } +TEST_F(FunctionOptimizerTest, FixedTypeFunction) { + // Create and instantiate a version of the XTimesTwo function that only + // accepts floats a inputs. + const Tensor kTwo = test::AsScalar(2.0f); + FunctionDef x_times_two = FunctionDefHelper::Define( + // Name + "XTimesTwo", + // Args + {"x: float"}, + // Return values + {"y: float"}, + // Attr def + {}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + {{"y"}, "Mul", {"x", "two"}, {{"T", DT_FLOAT}}}, + }); + + constexpr char device[] = "/device:CPU:0"; + GrapplerItem item; + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "XTimesTwo", {"x"}, {}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + x_times_two, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int count = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "y/inlined_inputs") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("x", node.input(0)); + } else if (node.name() == "y/x") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/inlined_inputs:0", node.input(0)); + } else if (node.name() == "y/two") { + count++; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(device, node.device()); + } else if (node.name() == "y/y") { + count++; + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("y/x", node.input(0)); + EXPECT_EQ("y/two:0", node.input(1)); + } else if (node.name() == "y") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/y", node.input(0)); + } else if (node.name() == "z") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y", node.input(0)); + } + } + EXPECT_EQ(6, count); + + item.fetch = {"z"}; + Tensor pi(DT_FLOAT, {}); + pi.flat()(0) = 3.14f; + item.feed.emplace_back("x", pi); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index b674ee1553..72d7b94dc8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -136,7 +136,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } } else { const std::set available_optimizers = { - "pruning", "constfold", "layout", "memory", + "pruning", "function", "constfold", "layout", "memory", "autoparallel", "arithmetic", "dependency", "loop"}; std::vector custom_optimizer_names; for (const auto& optimizer_name : cfg_.optimizers()) { -- GitLab From ac79486324bda04cc2f3b75e9590935dfe1ef826 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 1 Mar 2018 15:36:19 -0800 Subject: [PATCH 227/311] Checkpointable: Make Model Checkpointable-compatible Has Models track Checkpointable dependencies with __setattr__. Switches subclassed Models to creating ResourceVariables by default, which removes one source of eager/graph differences. tfe.Network was doing this by default. This is necessary for eager/graph agnostic code since tapes currently only work with ResourceVariables. It's not quite trivial to fix that, and ResourceVariables by default in more places is a Good Thing anyway. (Not that we shouldn't also fix the tape code.) PiperOrigin-RevId: 187544850 --- tensorflow/contrib/eager/python/BUILD | 1 + .../eager/python/checkpointable_utils_test.py | 154 ++++++++---------- .../keras/_impl/keras/engine/network.py | 8 + .../keras/_impl/keras/engine/training.py | 9 + 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index a26ec8513f..8c4b0827fd 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -262,6 +262,7 @@ py_test( "//tensorflow/python:variables", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", + "//tensorflow/python/keras", "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 7367f1b71c..9424de0835 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -22,7 +22,6 @@ import os import six from tensorflow.contrib.eager.python import checkpointable_utils -from tensorflow.contrib.eager.python import network as network_lib from tensorflow.python.client import session as session_lib from tensorflow.python.eager import context from tensorflow.python.eager import test @@ -30,7 +29,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.layers import base +from tensorflow.python.keras._impl.keras.engine import training from tensorflow.python.layers import core from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops @@ -42,21 +41,6 @@ from tensorflow.python.training import saver as core_saver from tensorflow.python.training import training_util -# pylint: disable=not-callable -class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): - - def __setattr__(self, name, value): - if isinstance(value, base.Layer): - self.track_layer(value, name=name) - # Checkpointable is next in the method resolution order, so this will catch - # Checkpointable objects which aren't Layers. - super(CheckpointableNetwork, self).__setattr__(name, value) - - def track_layer(self, layer, name): - self._track_checkpointable(layer, name=name) - return super(CheckpointableNetwork, self).track_layer(layer) - - class NonLayerCheckpointable(checkpointable.Checkpointable): def __init__(self): @@ -65,19 +49,20 @@ class NonLayerCheckpointable(checkpointable.Checkpointable): self, name="a_variable", shape=[]) -class MyNetwork(CheckpointableNetwork): - """A concrete Network for testing.""" +# pylint: disable=not-callable +class MyModel(training.Model): + """A concrete Model for testing.""" def __init__(self): - super(MyNetwork, self).__init__() + super(MyModel, self).__init__() self._named_dense = core.Dense(1, use_bias=True) - self._via_track_layer = self.track_layer( - core.Dense(1, use_bias=False), name="via_track_layer") + self._second = core.Dense(1, use_bias=False) # We can still track Checkpointables which aren't Layers. self._non_layer = NonLayerCheckpointable() def call(self, values): - return self._via_track_layer(self._named_dense(values)) + ret = self._second(self._named_dense(values)) + return ret class InterfaceTests(test.TestCase): @@ -171,26 +156,26 @@ class CheckpointingTests(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testNamingWithOptimizer(self): input_value = constant_op.constant([[3.]]) - network = MyNetwork() - # A nuisance Network using the same optimizer. Its slot variables should not + model = MyModel() + # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. - other_network = MyNetwork() + other_model = MyModel() optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, optimizer_step=optimizer_step) + optimizer=optimizer, model=model, optimizer_step=optimizer_step) if context.in_eager_mode(): optimizer.minimize( - lambda: network(input_value), + lambda: model(input_value), global_step=optimizer_step) optimizer.minimize( - lambda: other_network(input_value), + lambda: other_model(input_value), global_step=optimizer_step) else: train_op = optimizer.minimize( - network(input_value), global_step=optimizer_step) + model(input_value), global_step=optimizer_step) optimizer.minimize( - other_network(input_value), + other_model(input_value), global_step=optimizer_step) self.evaluate(checkpointable_utils.gather_initializers( root_checkpointable)) @@ -200,24 +185,21 @@ class CheckpointingTests(test.TestCase): expected_checkpoint_names = ( # Created in the root node, so no prefix. "optimizer_step", - # No name provided to track_checkpointable(), so the position is used - # instead (one-based). - "network/via_track_layer/kernel", - # track_checkpointable() with a name provided, so that's used - "network/_named_dense/kernel", - "network/_named_dense/bias", - # non-Layer dependency of the network - "network/_non_layer/a_variable", + "model/_second/kernel", + "model/_named_dense/kernel", + "model/_named_dense/bias", + # non-Layer dependency of the model + "model/_non_layer/a_variable", # The optimizer creates two non-slot variables "optimizer/beta1_power", "optimizer/beta2_power", # Slot variables - "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/m", - "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/v", - "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", - "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", - "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", - "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ @@ -229,11 +211,11 @@ class CheckpointingTests(test.TestCase): "global_step:0", named_variables["optimizer_step" + suffix].name) self.assertEqual( - "my_network/dense_1/kernel:0", - named_variables["network/via_track_layer/kernel" + suffix].name) + "my_model/dense_1/kernel:0", + named_variables["model/_second/kernel" + suffix].name) self.assertEqual( - "my_network/dense/kernel:0", - named_variables["network/_named_dense/kernel" + suffix].name) + "my_model/dense/kernel:0", + named_variables["model/_named_dense/kernel" + suffix].name) self.assertEqual( "beta1_power:0", named_variables["optimizer/beta1_power" + suffix].name) @@ -251,80 +233,80 @@ class CheckpointingTests(test.TestCase): serialized_graph.nodes[optimizer_node.children[0].node_id] .attributes[0].full_name) self.assertEqual( - "my_network/dense/kernel", + "my_model/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[0] .original_variable_node_id] .attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( - "my_network/dense/kernel/Adam", + "my_model/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[0] .slot_variable_node_id] .attributes[0].full_name) self.assertEqual( - "my_network/dense/kernel/Adam:0", + "my_model/dense/kernel/Adam:0", optimizer.get_slot( - var=named_variables["network/_named_dense/kernel" + suffix], + var=named_variables["model/_named_dense/kernel" + suffix], name="m").name) self.assertEqual( - "network/_named_dense/kernel" + suffix, + "model/_named_dense/kernel" + suffix, serialized_graph.nodes[ optimizer_node.slot_variables[0] .original_variable_node_id].attributes[0].checkpoint_key) self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) self.assertEqual( - "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, serialized_graph.nodes[ optimizer_node.slot_variables[0] .slot_variable_node_id].attributes[0].checkpoint_key) @test_util.run_in_graph_and_eager_modes() def testSaveRestore(self): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root_checkpointable = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network) + optimizer=optimizer, model=model) input_value = constant_op.constant([[3.]]) if context.in_eager_mode(): optimizer.minimize( - lambda: network(input_value)) + lambda: model(input_value)) else: - train_op = optimizer.minimize(network(input_value)) + train_op = optimizer.minimize(model(input_value)) # TODO(allenl): Make initialization more pleasant when graph building. root_checkpointable.save_counter # pylint: disable=pointless-statement self.evaluate(checkpointable_utils.gather_initializers( root_checkpointable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(network._named_dense.variables[1], "m") + self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) + m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(state_ops.assign(m_bias_slot, [1.5])) save_path = root_checkpointable.save(file_prefix=prefix) - self.evaluate(state_ops.assign(network._named_dense.variables[1], [43.])) + self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3)) optimizer_variables = self.evaluate(optimizer.variables()) self.evaluate(state_ops.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_checkpointable.restore(save_path=save_path).assert_consumed() status.run_restore_ops() - self.assertAllEqual([42.], self.evaluate(network._named_dense.variables[1])) + self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if context.in_graph_mode(): return # Restore-on-create is only supported when executing eagerly - on_create_network = MyNetwork() + on_create_model = MyModel() on_create_optimizer = adam.AdamOptimizer(0.001) on_create_root = checkpointable_utils.Checkpoint( - optimizer=on_create_optimizer, network=on_create_network) + optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) - on_create_network(constant_op.constant([[3.]])) # create variables + on_create_model(constant_op.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( - on_create_network._named_dense.variables[1])) + on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_network._named_dense.variables[1], "m") + on_create_model._named_dense.variables[1], "m") # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) @@ -344,17 +326,17 @@ class CheckpointingTests(test.TestCase): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, + optimizer=optimizer, model=model, optimizer_step=training_util.get_or_create_global_step()) root.restore(core_saver.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): # TODO(allenl): Use a Dataset and serialize/checkpoint it. input_value = constant_op.constant([[3.]]) optimizer.minimize( - lambda: network(input_value), # pylint: disable=cell-var-from-loop + lambda: model(input_value), # pylint: disable=cell-var-from-loop global_step=root.optimizer_step) root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, @@ -368,14 +350,14 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with ops.Graph().as_default(): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, + optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) input_value = constant_op.constant([[3.]]) train_op = optimizer.minimize( - network(input_value), + model(input_value), global_step=root.global_step) checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) with self.test_session(graph=ops.get_default_graph()) as session: @@ -405,17 +387,17 @@ class CheckpointingTests(test.TestCase): for training_continuation in range(3): with ops.Graph().as_default(), self.test_session( graph=ops.get_default_graph()): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, + optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = constant_op.constant([[3.]]) train_fn = functools.partial( optimizer.minimize, - functools.partial(network, input_value), + functools.partial(model, input_value), global_step=root.global_step) if context.in_graph_mode(): train_fn = functools.partial(self.evaluate, train_fn()) @@ -877,41 +859,41 @@ class CheckpointCompatibilityTests(test.TestCase): def _initialized_model(self): input_value = constant_op.constant([[3.]]) - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, optimizer_step=optimizer_step) + optimizer=optimizer, model=model, optimizer_step=optimizer_step) train_op = optimizer.minimize( - functools.partial(network, input_value), + functools.partial(model, input_value), global_step=optimizer_step) self.evaluate(checkpointable_utils.gather_initializers( root_checkpointable)) self.evaluate(train_op) # A regular variable, a slot variable, and a non-slot Optimizer variable # with known values to check when loading. - self.evaluate(network._named_dense.bias.assign([1.])) + self.evaluate(model._named_dense.bias.assign([1.])) self.evaluate(optimizer.get_slot( - var=network._named_dense.bias, name="m").assign([2.])) + var=model._named_dense.bias, name="m").assign([2.])) beta1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta1_power.assign(3.)) return root_checkpointable def _set_sentinels(self, root_checkpointable): - self.evaluate(root_checkpointable.network._named_dense.bias.assign([101.])) + self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.])) self.evaluate( root_checkpointable.optimizer.get_slot( - var=root_checkpointable.network._named_dense.bias, name="m") + var=root_checkpointable.model._named_dense.bias, name="m") .assign([102.])) beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() self.evaluate(beta1_power.assign(103.)) def _check_sentinels(self, root_checkpointable): self.assertAllEqual( - [1.], self.evaluate(root_checkpointable.network._named_dense.bias)) + [1.], self.evaluate(root_checkpointable.model._named_dense.bias)) self.assertAllEqual([2.], self.evaluate( root_checkpointable.optimizer.get_slot( - var=root_checkpointable.network._named_dense.bias, name="m"))) + var=root_checkpointable.model._named_dense.bias, name="m"))) beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() self.assertAllEqual(3., self.evaluate(beta1_power)) diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 453cc8f8b7..e47bba9267 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -38,6 +38,7 @@ from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.layers import utils as tf_layers_util from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -302,6 +303,13 @@ class Network(base_layer.Layer): if not is_graph_network: if value not in self._layers: self._layers.append(value) + if isinstance(value, checkpointable.CheckpointableBase): + # Layer (and therefore Network/Model) inherit from CheckpointableBase + # rather than Checkpointable, which means there is no Checkpointable + # __setattr__ override (it would be a performance issue for functional + # layers). Therefore Model tracks Checkpointable objects itself. + self._track_checkpointable( + checkpointable=value, name=name, overwrite=True) super(Network, self).__setattr__(name, value) def add_variable(self, name, shape, dtype=None, initializer=None, diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 2d040e7c0f..81ab77094e 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -879,6 +879,15 @@ class Model(Network): else: self._symbolic_set_inputs(inputs, training=training) + def _set_scope(self, scope=None): + """Modify the Layer scope creation logic to create ResourceVariables.""" + super(Model, self)._set_scope(scope=scope) + # Subclassed Models create ResourceVariables by default. This makes it + # easier to use Models in an eager/graph agnostic way (since eager execution + # always uses ResourceVariables). + if not self._is_graph_network: + self._scope.set_use_resource(True) + def _eager_set_inputs(self, inputs): """Set model's input and output specs based on the input data received. -- GitLab From 45daab910a3c730380594317749d911db5e933e6 Mon Sep 17 00:00:00 2001 From: Xiaoqiang Zheng Date: Thu, 1 Mar 2018 15:41:11 -0800 Subject: [PATCH 228/311] A fp16 implemention for ReluGrad. On V100 with Cuda 9, it reduces the average ReluGrad kernel time in Resnet50 from 249.44 us to 175.60 us, a 42% speedup. On Titan-X Pascal with Cuda 9, it reduces the average ReluGrad kernel time in Resnet50 from 747.98 us to 509.37 us, a 46.8% improvement. PiperOrigin-RevId: 187545504 --- tensorflow/core/kernels/relu_op_gpu.cu.cc | 93 ++++++++++++++++++- tensorflow/core/util/cuda_kernel_helper.h | 5 + .../python/kernel_tests/relu_op_test.py | 31 +++++++ 3 files changed, 127 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc index ec09d8dfea..6e46c979f3 100644 --- a/tensorflow/core/kernels/relu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc @@ -19,15 +19,104 @@ limitations under the License. #include -#include "tensorflow/core/kernels/relu_op_functor.h" - +#include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/relu_op_functor.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" +#include "tensorflow/core/util/cuda_launch_config.h" namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; +namespace functor { +#ifdef TF_HAS_CUDA_FP16 + +// This kernel computes ReluGrad by processing one half2, two fp16, at a time. +// It effectively does: backdrops = (feature > 0) ? gradient : 0 +// It also tries to use native half2 primitives as much as possible. +__global__ void ReluGradHalfKernel(const Eigen::half* gradient, + const Eigen::half* feature, + Eigen::half* backprop, int32 count) { + int32 half2_count = count >> 1; + int32 index = blockIdx.x * blockDim.x + threadIdx.x; + const int32 total_device_threads = gridDim.x * blockDim.x; + + while (index < half2_count) { + // The fast branch. + // One half2, two fp16, is fetched and processed at a time. + half2 gradient_h2 = reinterpret_cast(gradient)[index]; + half2 feature_h2 = reinterpret_cast(feature)[index]; + half2* p_backprop_h2 = reinterpret_cast(backprop) + index; + +#if __CUDA_ARCH__ >= 530 + // Fast path, when half2 primitives are available. + const half2 kZeroH2 = __float2half2_rn(0.f); + // mask = (feature > 0) + half2 mask_h2 = __hgt2(feature_h2, kZeroH2); + // backprop = mask * gradient + half2 backprop_h2 = __hmul2(mask_h2, gradient_h2); +#else + // Fall back: convert half2 to float2 for processing. + float2 feature_f2 = __half22float2(feature_h2); + float2 gradient_f2 = __half22float2(gradient_h2); + float2 backprop_f2 = make_float2((feature_f2.x > 0) ? gradient_f2.x : 0, + (feature_f2.y > 0) ? gradient_f2.y : 0); + // Convert back to half2. + half2 backprop_h2 = __float22half2_rn(backprop_f2); +#endif + + // Write back the result. + *p_backprop_h2 = backprop_h2; + + index += total_device_threads; + } + + if ((count & 0x1) == 1 && index == half2_count) { + // If the total number of the elements is odd, process the last element. + Eigen::half grad_h = gradient[count - 1]; + Eigen::half feature_h = feature[count - 1]; + + float grad_f = static_cast(grad_h); + float feature_f = static_cast(feature_h); + float backprop_f = (feature_f > 0) ? grad_f : 0; + + Eigen::half backprop_h(backprop_f); + backprop[count - 1] = backprop_h; + } +} + +template +struct ReluGrad { + // Computes ReluGrad backprop. + // + // gradient: gradient backpropagated to the Relu op. + // feature: either the inputs that were passed to the Relu, or its outputs + // (using either one yields the same result here). + // backprop: gradient to backpropagate to the Relu inputs. + void operator()(const Device& d, + typename TTypes::ConstTensor gradient, + typename TTypes::ConstTensor feature, + typename TTypes::Tensor backprop) { + // NOTE: When the activation is exactly zero, we do not propagate the + // associated gradient value. This allows the output of the Relu to be used, + // as well as its input. + int32 count = gradient.size(); + if (count == 0) return; + int32 half2_count = Eigen::divup(count, 2); + const int32 kThreadInBlock = 512; + CudaLaunchConfig config = GetCudaLaunchConfigFixedBlockSize( + half2_count, d, ReluGradHalfKernel, 0, kThreadInBlock); + ReluGradHalfKernel<<>>(gradient.data(), feature.data(), + backprop.data(), count); + } +}; + +#endif // TF_HAS_CUDA_FP16 +} // namespace functor + // Definition of the GPU implementations declared in relu_op.cc. #define DEFINE_GPU_KERNELS(T) \ template struct functor::Relu; \ diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index 18a4c008f1..01a5b6828a 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -21,6 +21,11 @@ limitations under the License. #include "tensorflow/core/util/cuda_device_functions.h" #include "tensorflow/core/util/cuda_launch_config.h" +#if CUDA_VERSION >= 7050 +#include "cuda/include/cuda_fp16.h" +#define TF_HAS_CUDA_FP16 +#endif + // Deprecated, use 'for(int i : CudaGridRangeX(n))' instead. #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i : ::tensorflow::CudaGridRangeX(n)) diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 6b4091ae5d..25e947f09e 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -19,12 +19,14 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -87,6 +89,35 @@ class ReluTest(test.TestCase): print("relu (float32) gradient err = ", err) self.assertLess(err, 1e-4) + # The gradient for fp16 is inaccurate due to the low-precision. + # Instead of relying on compute_gradient_error, we compare the fp16 analytical + # gradient against their fp32 counterpart. + def testGradientFloat16(self): + with self.test_session(use_gpu=True) as sess: + # Randomly construct a 1D shape from [1, 40) + shape = random_ops.random_uniform( + [1], minval=1, maxval=40, dtype=dtypes.int32) + + # Construct the fp32 graph and its gradient. + x = random_ops.random_uniform(shape, minval=-1, maxval=1, name="x") + y1 = nn_ops.relu(x, name="relu_fp32") + l1 = nn_ops.l2_loss(y1) + dx_f32 = gradients_impl.gradients(l1, x) + + # Construct the fp16 graph and its gradient. + # It starts with the same x, in fp32. But before it reaches Relu, it is + # cast into fp16. So during backprop, the gradient computation is in fp16. + x2 = math_ops.cast(x, dtype=dtypes.float16, name="cast") + y2 = nn_ops.relu(x2, name="relu_fp16") + l2 = nn_ops.l2_loss(y2) + dx_f16 = gradients_impl.gradients(l2, x) + + # Repeat the experiment for 100 times. All tensor shapes and its tensor + # values are randomly generated for each run. + for _ in xrange(100): + dx_f32_v, dx_f16_v = sess.run([dx_f32, dx_f16]) + self.assertAllClose(dx_f32_v, dx_f16_v, atol=3e-4) + def testGradientFloat64(self): with self.test_session(): x = constant_op.constant( -- GitLab From 80ebc380ec8dacdf900cc66c6590054e26b6dade Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 1 Mar 2018 15:47:28 -0800 Subject: [PATCH 229/311] Fix batch_norm_benchmark. PiperOrigin-RevId: 187546384 --- tensorflow/python/ops/batch_norm_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py index 4f65e3771c..5d68b47aea 100644 --- a/tensorflow/python/ops/batch_norm_benchmark.py +++ b/tensorflow/python/ops/batch_norm_benchmark.py @@ -41,7 +41,7 @@ def batch_norm_op(tensor, mean, variance, beta, gamma, scale): # _batch_norm_with_global_normalization is deprecated in v9 ops.get_default_graph().graph_def_versions.producer = 8 # pylint: disable=protected-access - return gen_nn_ops.batch_norm_with_global_normalization( + return gen_nn_ops._batch_norm_with_global_normalization( tensor, mean, variance, beta, gamma, 0.001, scale) # pylint: enable=protected-access -- GitLab From 6db78cd5266dc761c4f90a80d7555c6c33fc453a Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 1 Mar 2018 16:00:17 -0800 Subject: [PATCH 230/311] [ClusterFLR] Prolong the lifetime of the RunGraphRequest until the call has completed. Some WorkerService implementations rely on the request object remaining live until the callback is called. PiperOrigin-RevId: 187548140 --- .../cluster_function_library_runtime.cc | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc index 3a8d591236..0c5c4d59ed 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc @@ -175,32 +175,33 @@ void ClusterFunctionLibraryRuntime::Run( return; } - RunGraphRequest req; - req.set_session_handle(worker_session_->session_name); - req.set_graph_handle(function_data->graph_handle); + RunGraphRequest* req = new RunGraphRequest; + req->set_session_handle(worker_session_->session_name); + req->set_graph_handle(function_data->graph_handle); // Borrowed from master_session.cc const uint64 step_id = (random::New64() & ((1uLL << 56) - 1)) | (1uLL << 56); - req.set_step_id(step_id); + req->set_step_id(step_id); int i = 0; for (const auto& send_key : function_data->send_keys) { - NamedTensorProto* send = req.add_send(); + NamedTensorProto* send = req->add_send(); send->set_name(send_key); args[i].AsProtoTensorContent(send->mutable_tensor()); i++; } const std::vector& recv_keys = function_data->recv_keys; for (const auto& recv_key : recv_keys) { - req.add_recv_key(recv_key); + req->add_recv_key(recv_key); } RunGraphResponse* resp = new RunGraphResponse(); CallOptions* call_options = new CallOptions(); wi->RunGraphAsync( - call_options, &req, resp, - [call_options, resp, rets, recv_keys, done](const Status& status) { + call_options, req, resp, + [call_options, req, resp, rets, recv_keys, done](const Status& status) { if (!status.ok()) { done(status); delete call_options; + delete req; delete resp; return; } @@ -212,25 +213,28 @@ void ClusterFunctionLibraryRuntime::Run( for (const auto& recv_key : recv_keys) { TensorProto* tp = mapped_recvs[recv_key]; if (tp == nullptr) { + done(errors::Internal("Could not find key: ", recv_key)); delete call_options; + delete req; delete resp; - done(errors::Internal("Could not find key: ", recv_key)); return; } Tensor t; if (t.FromProto(*tp)) { rets->push_back(t); } else { - delete call_options; - delete resp; done(errors::Internal("Could not convert tensor proto: ", tp->DebugString())); + delete call_options; + delete req; + delete resp; return; } } + done(status); delete call_options; + delete req; delete resp; - done(status); }); } -- GitLab From c4a50c5897170edf3055afcce25c981ee331de07 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 16:06:22 -0800 Subject: [PATCH 231/311] Do not crash if we failed to get the field name. PiperOrigin-RevId: 187549153 --- tensorflow/contrib/lite/java/proguard.flags | 3 +++ .../lite/java/src/main/native/nativeinterpreterwrapper_jni.cc | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/lite/java/proguard.flags diff --git a/tensorflow/contrib/lite/java/proguard.flags b/tensorflow/contrib/lite/java/proguard.flags new file mode 100644 index 0000000000..8ee3d7e7ae --- /dev/null +++ b/tensorflow/contrib/lite/java/proguard.flags @@ -0,0 +1,3 @@ +-keepclassmembers class org.tensorflow.lite.NativeInterpreterWrapper { + private long inferenceDurationNanoseconds; +} \ No newline at end of file diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index 47bf4c9c9d..475b467fac 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -447,7 +447,9 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( jclass wrapper_clazz = env->GetObjectClass(wrapper); jfieldID fid = env->GetFieldID(wrapper_clazz, "inferenceDurationNanoseconds", "J"); - if (fid != 0) { + if (env->ExceptionCheck()) { + env->ExceptionClear(); + } else if (fid != nullptr) { env->SetLongField( wrapper, fid, ::tflite::timespec_diff_nanoseconds(&beforeInference, &afterInference)); -- GitLab From 980028f59f96c7e60688fef9106df2d043e02629 Mon Sep 17 00:00:00 2001 From: Karmel Allison Date: Thu, 1 Mar 2018 16:33:26 -0800 Subject: [PATCH 232/311] Adds a TensorServingInputReceiver that allows export_savedmodel to pass raw tensors to model functions. Addresses #11674. PiperOrigin-RevId: 187552824 --- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/estimator/estimator_test.py | 55 ++++++++++++++++ tensorflow/python/estimator/export/export.py | 56 +++++++++++++++++ .../python/estimator/export/export_lib.py | 2 + .../python/estimator/export/export_test.py | 62 +++++++++++++++++++ ...xport.-tensor-serving-input-receiver.pbtxt | 27 ++++++++ .../golden/tensorflow.estimator.export.pbtxt | 4 ++ 7 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 1167b3834e..1a2b33721a 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -570,7 +570,7 @@ class Estimator(object): export_dir_base: A string containing a directory in which to create timestamped subdirectories containing exported SavedModels. serving_input_receiver_fn: A function that takes no argument and - returns a `ServingInputReceiver`. + returns a `ServingInputReceiver` or `TensorServingInputReceiver`. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel, or `None` if no extra assets are needed. as_text: whether to write the SavedModel proto in text format. diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 7a0745b1d0..ac0ff41dd2 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -48,6 +48,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import lookup_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops @@ -1936,6 +1937,60 @@ class EstimatorExportTest(test.TestCase): # cleanup gfile.DeleteRecursively(tmpdir) + def test_export_savedmodel_tensor_features(self): + """Test that models accepting a single raw Tensor can be exported. + + See https://github.com/tensorflow/tensorflow/issues/11674 + + If the model_fn and receiver_fn accept raw tensors rather than dictionaries + as input, export_savedmodel should be okay with that, too. + + """ + + tmpdir = tempfile.mkdtemp() + + def _input_fn_tensor_features(): + t = array_ops.constant([1, 2, 3], dtype=dtypes.float32, shape=[1, 3]) + return (t, None) + + def _model_fn_tensor_features(features, labels, mode): + _ = labels + prediction = math_ops.matmul(features, features, transpose_b=True) + + return model_fn_lib.EstimatorSpec( + mode, + predictions=prediction, + loss=constant_op.constant(1.), + train_op=state_ops.assign_add(training.get_global_step(), 1), + export_outputs={ + 'test': export_output.PredictOutput({'prediction': prediction}) + }) + + def _serving_input_receiver_fn(): + feat = array_ops.placeholder(dtype=dtypes.float32) + return export.TensorServingInputReceiver( + features=feat, receiver_tensors=feat) + + est = estimator.Estimator(model_fn=_model_fn_tensor_features) + est.train(input_fn=_input_fn_tensor_features, steps=1) + + # Perform the export. + export_dir_base = os.path.join( + compat.as_bytes(tmpdir), compat.as_bytes('export')) + export_dir = est.export_savedmodel( + export_dir_base, _serving_input_receiver_fn) + + # Restore, to validate that the export was well-formed. + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.SERVING], export_dir) + graph_ops = [x.name.lower() for x in graph.get_operations()] + self.assertTrue('const' in graph_ops) + self.assertTrue('matmul' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + def test_scaffold_is_used_for_saver(self): tmpdir = tempfile.mkdtemp() diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index 83251c79fc..f240e11478 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -120,6 +120,62 @@ class ServingInputReceiver(collections.namedtuple( receiver_tensors_alternatives=receiver_tensors_alternatives) +@tf_export('estimator.export.TensorServingInputReceiver') +class TensorServingInputReceiver(collections.namedtuple( + 'TensorServingInputReceiver', + ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])): + """A return type for a serving_input_receiver_fn. + + This is for use with models that expect a single `Tensor` or `SparseTensor` + as an input feature, as opposed to a dict of features. + + The normal `ServingInputReceiver` always returns a feature dict, even if it + contains only one entry, and so can be used only with models that accept such + a dict. For models that accept only a single raw feature, the + `serving_input_receiver_fn` provided to `Estimator.export_savedmodel()` should + return this `TensorServingInputReceiver` instead. See: + https://github.com/tensorflow/tensorflow/issues/11674 + + Note that the receiver_tensors and receiver_tensor_alternatives arguments + will be automatically converted to the dict representation in either case, + because the SavedModel format requires each input `Tensor` to have a name + (provided by the dict key). + + The expected return values are: + features: A single `Tensor` or `SparseTensor`, representing the feature + to be passed to the model. + receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying + input nodes where this receiver expects to be fed by default. Typically, + this is a single placeholder expecting serialized `tf.Example` protos. + receiver_tensors_alternatives: a dict of string to additional + groups of receiver tensors, each of which may be a `Tensor` or a dict of + string to `Tensor`. These named receiver tensor alternatives generate + additional serving signatures, which may be used to feed inputs at + different points within the input receiver subgraph. A typical usage is + to allow feeding raw feature `Tensor`s *downstream* of the + tf.parse_example() op. Defaults to None. + """ + + def __new__(cls, features, receiver_tensors, + receiver_tensors_alternatives=None): + if features is None: + raise ValueError('features must be defined.') + if not (isinstance(features, ops.Tensor) + or isinstance(features, sparse_tensor.SparseTensor)): + raise ValueError('feature must be a Tensor or SparseTensor.') + + receiver = ServingInputReceiver( + features=features, + receiver_tensors=receiver_tensors, + receiver_tensors_alternatives=receiver_tensors_alternatives) + + return super(TensorServingInputReceiver, cls).__new__( + cls, + features=receiver.features[_SINGLE_FEATURE_DEFAULT_NAME], + receiver_tensors=receiver.receiver_tensors, + receiver_tensors_alternatives=receiver.receiver_tensors_alternatives) + + @tf_export('estimator.export.build_parsing_serving_input_receiver_fn') def build_parsing_serving_input_receiver_fn(feature_spec, default_batch_size=None): diff --git a/tensorflow/python/estimator/export/export_lib.py b/tensorflow/python/estimator/export/export_lib.py index 99cd81d678..226fc97fd3 100644 --- a/tensorflow/python/estimator/export/export_lib.py +++ b/tensorflow/python/estimator/export/export_lib.py @@ -22,6 +22,7 @@ from __future__ import print_function from tensorflow.python.estimator.export.export import build_parsing_serving_input_receiver_fn from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn from tensorflow.python.estimator.export.export import ServingInputReceiver +from tensorflow.python.estimator.export.export import TensorServingInputReceiver from tensorflow.python.estimator.export.export_output import ClassificationOutput from tensorflow.python.estimator.export.export_output import ExportOutput from tensorflow.python.estimator.export.export_output import PredictOutput @@ -34,6 +35,7 @@ _allowed_symbols = [ 'build_parsing_serving_input_receiver_fn', 'build_raw_serving_input_receiver_fn', 'ServingInputReceiver', + 'TensorServingInputReceiver', 'ClassificationOutput', 'ExportOutput', 'PredictOutput', diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py index 8442bf04ac..eb9688bc97 100644 --- a/tensorflow/python/estimator/export/export_test.py +++ b/tensorflow/python/estimator/export/export_test.py @@ -385,5 +385,67 @@ class ExportTest(test_util.TensorFlowTestCase): self.assertTrue(int(time_2) < int(time_3)) +class TensorServingReceiverTest(test_util.TensorFlowTestCase): + + def test_tensor_serving_input_receiver_constructor(self): + features = constant_op.constant([0]) + receiver_tensors = { + "example0": array_ops.placeholder(dtypes.string, name="example0"), + u"example1": array_ops.placeholder(dtypes.string, name="example1"), + } + r = export.TensorServingInputReceiver(features, receiver_tensors) + self.assertTrue(isinstance(r.features, ops.Tensor)) + self.assertTrue(isinstance(r.receiver_tensors, dict)) + + def test_tensor_serving_input_receiver_sparse(self): + features = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1]) + receiver_tensors = { + "example0": array_ops.placeholder(dtypes.string, name="example0"), + u"example1": array_ops.placeholder(dtypes.string, name="example1"), + } + r = export.TensorServingInputReceiver(features, receiver_tensors) + self.assertTrue(isinstance(r.features, sparse_tensor.SparseTensor)) + self.assertTrue(isinstance(r.receiver_tensors, dict)) + + def test_serving_input_receiver_features_invalid(self): + receiver_tensors = { + "example0": array_ops.placeholder(dtypes.string, name="example0"), + u"example1": array_ops.placeholder(dtypes.string, name="example1"), + } + + with self.assertRaisesRegexp(ValueError, "features must be defined"): + export.TensorServingInputReceiver( + features=None, + receiver_tensors=receiver_tensors) + + with self.assertRaisesRegexp(ValueError, "feature must be a Tensor"): + export.TensorServingInputReceiver( + features={"1": constant_op.constant([1])}, + receiver_tensors=receiver_tensors) + + def test_serving_input_receiver_receiver_tensors_invalid(self): + features = constant_op.constant([0]) + + with self.assertRaisesRegexp( + ValueError, "receiver_tensors must be defined"): + export.TensorServingInputReceiver( + features=features, + receiver_tensors=None) + + with self.assertRaisesRegexp( + ValueError, "receiver_tensors keys must be strings"): + export.TensorServingInputReceiver( + features=features, + receiver_tensors={ + 1: array_ops.placeholder(dtypes.string, name="example0")}) + + with self.assertRaisesRegexp( + ValueError, "receiver_tensor example1 must be a Tensor"): + export.TensorServingInputReceiver( + features=features, + receiver_tensors={"example1": [1]}) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt new file mode 100644 index 0000000000..4fe92643bf --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt @@ -0,0 +1,27 @@ +path: "tensorflow.estimator.export.TensorServingInputReceiver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "features" + mtype: "" + } + member { + name: "receiver_tensors" + mtype: "" + } + member { + name: "receiver_tensors_alternatives" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt index 4d0dddb3bc..bd72f6cd79 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt @@ -20,6 +20,10 @@ tf_module { name: "ServingInputReceiver" mtype: "" } + member { + name: "TensorServingInputReceiver" + mtype: "" + } member_method { name: "build_parsing_serving_input_receiver_fn" argspec: "args=[\'feature_spec\', \'default_batch_size\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 72b3a5cd8d787bcdab40a94de4788e7e555c76da Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 1 Mar 2018 16:52:07 -0800 Subject: [PATCH 233/311] Expose Checkpointable symbols in tf.contrib.eager/tfe - tfe.Checkpoint Utility for grouping Checkpointable objects into training checkpoints, has save/restore methods which call CheckpointableSaver. - tfe.Checkpointable For user-defined Checkpointable objects. - tfe.CheckpointableSaver More control over saving/restoring than tfe.Checkpoint. Only tfe.Checkpoint is required to switch examples over, so I can leave the others out if there are objections. PiperOrigin-RevId: 187555472 --- tensorflow/contrib/cmake/python_modules.txt | 1 + tensorflow/contrib/cmake/python_protos.txt | 1 + tensorflow/contrib/eager/python/BUILD | 1 + tensorflow/contrib/eager/python/tfe.py | 7 +++++++ 4 files changed, 10 insertions(+) diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index bfe53c01b3..0d2a6a23db 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -165,6 +165,7 @@ tensorflow/contrib/distributions/python tensorflow/contrib/distributions/python/ops tensorflow/contrib/distributions/python/ops/bijectors tensorflow/contrib/eager +tensorflow/contrib/eager/proto tensorflow/contrib/eager/python tensorflow/contrib/estimator tensorflow/contrib/estimator/python diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt index 8a9c406d8b..c03c0c80fe 100644 --- a/tensorflow/contrib/cmake/python_protos.txt +++ b/tensorflow/contrib/cmake/python_protos.txt @@ -4,6 +4,7 @@ tensorflow/python tensorflow/contrib/boosted_trees/proto tensorflow/contrib/cloud/kernels tensorflow/contrib/decision_trees/proto +tensorflow/contrib/eager/proto tensorflow/contrib/gdr tensorflow/contrib/lite/toco tensorflow/contrib/mpi diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 8c4b0827fd..e8c514c114 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -11,6 +11,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + ":checkpointable_utils", ":datasets", ":metrics", ":network", diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index d32bebf90c..fce7a60853 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -56,6 +56,10 @@ To use, at program startup, call `tfe.enable_eager_execution()`. @@save_network_checkpoint @@restore_network_checkpoint +@@Checkpoint +@@Checkpointable +@@CheckpointableSaver + @@in_eager_mode @@in_graph_mode @@ -74,6 +78,8 @@ from __future__ import print_function # pylint:disable=g-bad-import-order,g-import-not-at-top,unused-import # from tensorflow.contrib.eager.python import metrics +from tensorflow.contrib.eager.python.checkpointable_utils import CheckpointableSaver +from tensorflow.contrib.eager.python.checkpointable_utils import Checkpoint from tensorflow.contrib.eager.python.datasets import Iterator from tensorflow.contrib.eager.python.network import Network from tensorflow.contrib.eager.python.network import Sequential @@ -105,6 +111,7 @@ from tensorflow.python.ops.resource_variable_ops import ResourceVariable as Vari from tensorflow.python.ops.variable_scope import EagerVariableStore from tensorflow.python.ops import script_ops from tensorflow.python.ops import template +from tensorflow.python.training.checkpointable import Checkpointable from tensorflow.python.util.all_util import remove_undocumented py_func = script_ops.eager_py_func -- GitLab From 700c406bc5c9182b91cf32873e8ae0d81e084114 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 1 Mar 2018 17:00:46 -0800 Subject: [PATCH 234/311] Include the response upon any error. PiperOrigin-RevId: 187556563 --- .../core/platform/cloud/curl_http_request.cc | 56 ++++++++++++------- .../platform/cloud/curl_http_request_test.cc | 7 ++- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 4b5f6974c1..80ad1cf0b8 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -399,6 +399,24 @@ size_t CurlHttpRequest::HeaderCallback(const void* ptr, size_t size, return size * nmemb; } +// This is pulled out as a separate function so that it's only computed when +// an error occurs. +string response_to_error_message(uint64 response_code, StringPiece response, + size_t response_to_error_limit, + CURLcode curl_result, + StringPiece error_buffer) { + string error_message = strings::StrCat( + "Error executing an HTTP request (HTTP response code ", response_code, + ", error code ", curl_result, ", error message '", error_buffer, "')"); + if (!response.empty()) { + return strings::StrCat( + error_message, ", response '", + response.substr(0, std::min(response.size(), response_to_error_limit)), + "'"); + } + return error_message; +} + Status CurlHttpRequest::Send() { CheckNotSent(); CHECK(is_uri_set_) << "URI has not been set."; @@ -430,13 +448,7 @@ Status CurlHttpRequest::Send() { libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, &response_code_); - const auto& error_message = strings::StrCat( - "Error executing an HTTP request (HTTP response code ", response_code_, - ", error code ", curl_result, ", error message '", error_buffer, "')"); - Status result; - StringPiece response = GetResponse(); - string extended_error_message; switch (response_code_) { // The group of response codes indicating that the request achieved // the expected goal. @@ -447,7 +459,9 @@ Status CurlHttpRequest::Send() { if (curl_result != CURLE_OK) { // This means the server executed the request successfully, but then // something went wrong during the transmission of the response. - result = errors::Unavailable(error_message); + result = errors::Unavailable(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, + curl_result, error_buffer)); } else { result = Status::OK(); } @@ -463,27 +477,25 @@ Status CurlHttpRequest::Send() { // INVALID_ARGUMENT indicates a problem with how the request is constructed. case 400: // Bad Request case 411: // Length Required - result = errors::InvalidArgument(error_message); + result = errors::InvalidArgument(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // PERMISSION_DENIED indicates an authentication or an authorization issue. case 401: // Unauthorized case 403: // Forbidden - if (!response.empty()) { - extended_error_message = strings::StrCat( - error_message, ", response ", - response.substr( - 0, std::min(response.size(), response_to_error_limit_))); - result = errors::PermissionDenied(extended_error_message); - } else { - result = errors::PermissionDenied(error_message); - } + result = errors::PermissionDenied(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // NOT_FOUND indicates that the requested resource does not exist. case 404: // Not found case 410: // Gone - result = errors::NotFound(error_message); + result = errors::NotFound(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // FAILED_PRECONDITION indicates that the request failed because some @@ -495,7 +507,9 @@ Status CurlHttpRequest::Send() { case 307: // Temporary Redirect case 412: // Precondition Failed case 413: // Payload Too Large - result = errors::FailedPrecondition(error_message); + result = errors::FailedPrecondition(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // UNAVAILABLE indicates a problem that can go away if the request @@ -511,7 +525,9 @@ Status CurlHttpRequest::Send() { case 502: // Bad Gateway case 503: // Service Unavailable default: // All other HTTP response codes also should be retried. - result = errors::Unavailable(error_message); + result = errors::Unavailable(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; } if (!result.ok()) { diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 86d26a0287..94af121768 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -378,7 +378,7 @@ TEST(CurlHttpRequestTest, GetRequest_503) { EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( "Error executing an HTTP request (HTTP response code 503, " - "error code 23, error message '')", + "error code 23, error message ''), response 'get response'", status.error_message()); EXPECT_EQ(503, http_request.GetResponseCode()); } @@ -397,7 +397,8 @@ TEST(CurlHttpRequestTest, GetRequest_HttpCode0) { EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( "Error executing an HTTP request (HTTP response code 0, " - "error code 28, error message 'Operation timed out')", + "error code 28, error message 'Operation timed out'), " + "response 'get response'", status.error_message()); EXPECT_EQ(0, http_request.GetResponseCode()); } @@ -629,7 +630,7 @@ TEST(CurlHttpRequestTest, ProgressIsStuck) { EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( "Error executing an HTTP request (HTTP response code 200, " - "error code 42, error message '')", + "error code 42, error message ''), response 'test'", status.error_message()); } -- GitLab From 64bd36057449dd01d6944b8d31a53b1301923f2c Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 1 Mar 2018 17:07:20 -0800 Subject: [PATCH 235/311] Improve the error message when failing to write events. The current error message looks like: "Failed to sync 10 to " PiperOrigin-RevId: 187557623 --- tensorflow/core/util/events_writer.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/events_writer.cc b/tensorflow/core/util/events_writer.cc index 49507616ed..c50e329bda 100644 --- a/tensorflow/core/util/events_writer.cc +++ b/tensorflow/core/util/events_writer.cc @@ -122,9 +122,11 @@ Status EventsWriter::Flush() { CHECK(recordio_file_ != nullptr) << "Unexpected NULL file"; TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_writer_->Flush(), "Failed to flush ", - num_outstanding_events_, " to ", filename_); + num_outstanding_events_, " events to ", + filename_); TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_file_->Sync(), "Failed to sync ", - num_outstanding_events_, " to ", filename_); + num_outstanding_events_, " events to ", + filename_); // The FileStillExists() condition is necessary because // recordio_writer_->Sync() can return OK even if the underlying @@ -135,7 +137,8 @@ Status EventsWriter::Flush() { // disappearing file, in case for some file system File::Exists() is // false after File::Open() but before File::Sync(). TF_RETURN_WITH_CONTEXT_IF_ERROR(FileStillExists(), "Failed to flush ", - num_outstanding_events_, " to ", filename_); + num_outstanding_events_, " events to ", + filename_); VLOG(1) << "Wrote " << num_outstanding_events_ << " events to disk."; num_outstanding_events_ = 0; return Status::OK(); -- GitLab From 16f1eea1cdfdb7facdac8ac2ccab3ee80af41409 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 1 Mar 2018 17:20:54 -0800 Subject: [PATCH 236/311] Scaffolding for int8 calibration in TF-TRT (#17309) * Scaffolding for int8 calibration * Add ops/trt_calib_op.cc * Rename files and replace std::string with string * Line lengths, variable names, conditionals in BUILD * mode variable renaming * More fixes for review * Run clang-format * Fix the build failue and replace the macro with a function * Add TODO(aaroey) for future PRs * Fix namespace for internal build * Fix mismatched argument name and unused includes to make internal build happy * Fix order of dependencies in BUILD file * Remove dangling #undef --- tensorflow/contrib/tensorrt/BUILD | 44 +++++- .../contrib/tensorrt/kernels/trt_calib_op.cc | 129 ++++++++++++++++++ .../contrib/tensorrt/kernels/trt_calib_op.h | 52 +++++++ .../contrib/tensorrt/ops/trt_calib_op.cc | 37 +++++ .../tensorrt/resources/trt_int8_calibrator.cc | 119 ++++++++++++++++ .../tensorrt/resources/trt_int8_calibrator.h | 65 +++++++++ .../resources/trt_resource_manager.cc | 39 ++++++ .../tensorrt/resources/trt_resource_manager.h | 49 +++++++ .../tensorrt/resources/trt_resources.h | 95 +++++++++++++ 9 files changed, 625 insertions(+), 4 deletions(-) create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.h create mode 100644 tensorflow/contrib/tensorrt/ops/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resource_manager.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resources.h diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 65a0e903a7..9909fcaca2 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -47,7 +47,10 @@ tf_cuda_cc_test( tf_custom_op_library( name = "python/ops/_trt_engine_op.so", - srcs = ["ops/trt_engine_op.cc"], + srcs = [ + "ops/trt_calib_op.cc", + "ops/trt_engine_op.cc", + ], deps = [ ":trt_engine_op_kernel", ":trt_shape_function", @@ -71,11 +74,18 @@ tf_cuda_library( cc_library( name = "trt_engine_op_kernel", - srcs = ["kernels/trt_engine_op.cc"], - hdrs = ["kernels/trt_engine_op.h"], + srcs = [ + "kernels/trt_calib_op.cc", + "kernels/trt_engine_op.cc", + ], + hdrs = [ + "kernels/trt_calib_op.h", + "kernels/trt_engine_op.h", + ], copts = tf_copts(), deps = [ ":trt_logging", + ":trt_resources", "//tensorflow/core:gpu_headers_lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:stream_executor_headers_lib", @@ -87,7 +97,10 @@ cc_library( ) tf_gen_op_libs( - op_lib_names = ["trt_engine_op"], + op_lib_names = [ + "trt_engine_op", + "trt_calib_op", + ], deps = if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]), @@ -108,6 +121,7 @@ tf_cuda_library( tf_gen_op_wrapper_py( name = "trt_engine_op", deps = [ + ":trt_calib_op_op_lib", ":trt_engine_op_op_lib", ":trt_logging", ":trt_shape_function", @@ -171,6 +185,27 @@ tf_py_wrap_cc( ], ) +tf_cuda_library( + name = "trt_resources", + srcs = [ + "resources/trt_int8_calibrator.cc", + "resources/trt_resource_manager.cc", + ], + hdrs = [ + "resources/trt_int8_calibrator.h", + "resources/trt_resource_manager.h", + "resources/trt_resources.h", + ], + deps = [ + ":trt_logging", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + # Library for the node-level conversion portion of TensorRT operation creation tf_cuda_library( name = "trt_conversion", @@ -185,6 +220,7 @@ tf_cuda_library( deps = [ ":segment", ":trt_logging", + ":trt_resources", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core:framework", diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc new file mode 100644 index 0000000000..1dcb87e768 --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -0,0 +1,129 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/kernels/trt_calib_op.h" +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("segment_nodes", &segment_nodes_)); + OP_REQUIRES_OK(context, context->GetAttr("input_names", &input_names_)); + OP_REQUIRES_OK(context, context->GetAttr("resource_name", &resource_name_)); +}; + +#define TYPECASE(dt, X, Y) \ + case dt: { \ + return (void*)X->flat::Type>().data(); \ + } + +void* GetTensorAddress(const Tensor* tensor_ptr) { + auto tensor_type = tensor_ptr->dtype(); + switch (tensor_type) { + TYPECASE(tensorflow::DT_FLOAT, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_HALF, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_INT8, tensor_ptr, dest_ptr); + default: { + LOG(FATAL) << "Unsupported Data type " + << tensorflow::DataTypeString(tensor_type); + return nullptr; + } + } +} + +void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { + // TODO(aaroey): make sure ctx->resource_mgr() is used in future PR. + auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); + auto res_mgr = trt_rm->getManager("TRTCalibOps"); + tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; + auto status = res_mgr->Lookup(resource_name_, resource_name_, &calib_res); + + if (!status.ok()) { + ctx->SetStatus(status); + return; + } + int num_inputs = ctx->num_inputs(); + // first run instantiate calibrator + if (calib_res->calibrator_ == nullptr) { + dev_tensors_.resize(num_inputs); + int batch_size = ctx->input(0).dim_size(0); + VLOG(1) << " Constructing calibrator"; + for (int i = 0; i < num_inputs; i++) { + // allocate workspace on device for inputs + const tensorflow::Tensor& t = ctx->input(i); + OP_REQUIRES_OK(ctx, + ctx->allocate_persistent(t.dtype(), t.shape(), + &dev_tensors_.at(i), nullptr)); + const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), device_tensor->TotalBytes()); + void* device_address = GetTensorAddress(device_tensor); + device_buffers_.emplace(input_names_.at(i), + std::pair( + device_address, device_tensor->TotalBytes())); + } + + calib_res->calibrator_ = + new TRTInt8Calibrator(device_buffers_, batch_size, resource_name_); + string label(resource_name_); + calib_res->thr_ = new std::thread([calib_res, label]() { + VLOG(1) << "Starting calibration thread, Calibration Resource @ " + << calib_res; + calib_res->builder_->setInt8Calibrator(calib_res->calibrator_); + calib_res->builder_->setInt8Mode(true); + calib_res->engine_ = calib_res->builder_->buildCudaEngine( + *calib_res->network_); // will loop until we terminate calibrator + VLOG(1) << "Calibration loop terminated " << label; + }); + VLOG(1) << "initialized calibrator resource"; + } // calibrator initialized + + // Pass input data to calibrator + std::unordered_map input_data; + for (int i = 0; i < num_inputs; i++) { + const Tensor& t = ctx->input(i); + void* data_address = GetTensorAddress(&t); + const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), + device_tensor->TotalBytes()); // use the tensor so FW keeps it + input_data.emplace(input_names_.at(i), data_address); + ctx->set_output(i, t); + } + VLOG(2) << "Filled map for sending"; + calib_res->calibrator_->setBatch(input_data); + VLOG(2) << "Passed calibration data"; + // TODO(aaroey): make sure we wait for the completion of calibration on the + // last batch in future PR. +}; + +#undef TYPECASE + +REGISTER_KERNEL_BUILDER(Name("TRTCalibOp").Device(DEVICE_GPU), TRTCalibOp); + +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h new file mode 100644 index 0000000000..23df9db32f --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h @@ -0,0 +1,52 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H +#define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H + +#include +#include +#include +#include +#include +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +namespace tensorflow { +namespace tensorrt { +// TODO(sami): Convert this to async kernel! +class TRTCalibOp : public OpKernel { + public: + explicit TRTCalibOp(OpKernelConstruction* context); + + void Compute(OpKernelContext* context) override; + + private: + string resource_name_; + std::vector segment_nodes_; + std::vector input_names_; + std::vector shapes_; + std::unordered_map> device_buffers_; + std::vector dev_tensors_; +}; +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H diff --git a/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc new file mode 100644 index 0000000000..4835e50650 --- /dev/null +++ b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +namespace tensorflow { + +REGISTER_OP("TRTCalibOp") + .Attr("segment_nodes: list(string)") // names of the ops in segment + .Attr("segment_output_names: list(string)") // names of the output ops in + // segment + .Attr("input_names: list(string)") // names of the inputs for + // passing into tensorrt + .Attr("resource_name: string") + .Attr("InT: list({int8, float16, float32})") + .Input("in_tensor: InT") + .Output("out_tensor: InT") + .SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) { + for (int i = 0; i < c->num_inputs(); i++) { + c->set_output(i, c->input(i)); + } + return Status::OK(); + }); + +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc new file mode 100644 index 0000000000..3d5cc76c42 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -0,0 +1,119 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" + +#include +#include +#include + +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" + +namespace tensorflow { +namespace tensorrt { + +// set the batch size before constructing the thread to execute engine +int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } + +TRTInt8Calibrator::TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engine_name) + : batch_size_(batch_size), + done_(false), + dev_buffers_(dev_buffers), + calib_running_(false), + engine_name_(engine_name) {} + +bool TRTInt8Calibrator::setBatch( + const std::unordered_map& data) { + // TODO(aaroey): make sure that in future PR: + // 1. the mutex_lock is outside of the loop + // 2. wait() is used instead of wait_for() + // 3. done_ is to be protected by the mutex + // 4. the first batch is not missed + if (done_) return false; + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } + VLOG(1) << "Set Batch Waiting finished"; + for (const auto it : data) { + auto devptr = dev_buffers_.find(it.first); + if (devptr == dev_buffers_.end()) { + LOG(FATAL) << "FATAL " << engine_name_ << " input name '" << it.first + << "' does not match with the buffer names"; + } + const auto& d = devptr->second; + + // TODO(aaroey): we should not use sync copy on default stream. Make sure + // stream->ThenMemcpy() is used in future PRs. + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); + if (status != cudaSuccess) { + LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first + << "' failed with " << status; + } + } + calib_running_.store(true, std::memory_order_release); // release builder + cond_.notify_all(); + return true; +} + +bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, + int num_bindings) { + calib_running_.store(false, std::memory_order_release); // wait for new batch + cond_.notify_all(); + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } + if (done_) { + return false; + } + + for (int i = 0; i < num_bindings; i++) { + auto it = dev_buffers_.find(names[i]); + if (it == dev_buffers_.end()) { + LOG(FATAL) << "Calibration engine asked for unknown tensor name '" + << names[i] << "' at position " << i; + } + + bindings[i] = it->second.first; + } + return true; +} + +const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { + return nullptr; +} + +void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, + std::size_t length) {} +TRTInt8Calibrator::~TRTInt8Calibrator() { + VLOG(1) << "Destroying calibrator for " << engine_name_; +} + +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h new file mode 100644 index 0000000000..8830f7efe7 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ + +#include +#include +#include +#include +#include "tensorflow/core/platform/mutex.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" +namespace tensorflow { +namespace tensorrt { +// This class provides a 1 element queue to match TFs push model to +// TRTs pull model for calibration. When TRT implements a means for +// a push calibration This class should be updated accordingly + +struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { + public: + TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engine_name); + int getBatchSize() const override; + bool getBatch(void* bindings[], const char* names[], + int num_bindings) override; + bool setBatch(const std::unordered_map& data); + void setDone() { done_ = true; } + const void* readCalibrationCache(std::size_t& length) override; + void writeCalibrationCache(const void* ptr, std::size_t length) override; + ~TRTInt8Calibrator(); + + private: + const int batch_size_; + tensorflow::mutex cond_mtx_; // mutex for condition_variable + tensorflow::condition_variable cond_; // condition variable to implement + // producer-consumer queue for + // calibration + bool done_; + const std::unordered_map> + dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with + // buffer names + std::atomic_bool calib_running_; + string engine_name_; +}; +} // namespace tensorrt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc new file mode 100644 index 0000000000..e663eed4dd --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc @@ -0,0 +1,39 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace tensorrt { + +std::shared_ptr +tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) { + // mutex is held for lookup only. Most instantiations where mutex will be held + // longer will be during op creation and should be ok. + tensorflow::mutex_lock lock(map_mutex_); + auto s = managers_.find(op_name); + if (s == managers_.end()) { + auto it = managers_.emplace( + op_name, std::make_shared(op_name)); + VLOG(1) << "Returning a new manager " << op_name; + return it.first->second; + } + VLOG(1) << "Returning old manager " << op_name; + return s->second; +} + +} // namespace tensorrt +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h new file mode 100644 index 0000000000..5f8ad491d3 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCE_MANAGER_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCE_MANAGER_H_ +#include + +#include +#include +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +namespace tensorrt { + +class TRTResourceManager { + TRTResourceManager() = default; + + public: + static std::shared_ptr instance() { + static std::shared_ptr instance_( + new TRTResourceManager); + return instance_; + } + // returns a manager for given op, if it doesn't exists it creates one + std::shared_ptr getManager(const string& op_name); + + private: + std::unordered_map> + managers_; + tensorflow::mutex map_mutex_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCE_TRT_RESOURCE_MANAGER_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h new file mode 100644 index 0000000000..3c85968ae7 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -0,0 +1,95 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ + +#include +#include +#include +#include +#include +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/core/framework/resource_mgr.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { +class TRTCalibrationResource : public tensorflow::ResourceBase { + public: + TRTCalibrationResource() + : calibrator_(nullptr), + builder_(nullptr), + network_(nullptr), + engine_(nullptr), + logger_(nullptr), + thr_(nullptr) {} + string DebugString() override { + std::stringstream oss; + oss << " Calibrator = " << std::hex << calibrator_ << std::dec << std::endl + << " Builder = " << std::hex << builder_ << std::dec << std::endl + << " Network = " << std::hex << network_ << std::dec << std::endl + << " Engine = " << std::hex << engine_ << std::dec << std::endl + << " Logger = " << std::hex << logger_ << std::dec << std::endl + << " Thread = " << std::hex << thr_ << std::dec << std::endl; + return oss.str(); + } + ~TRTCalibrationResource() { + VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); + } + TRTInt8Calibrator* calibrator_; + nvinfer1::IBuilder* builder_; + nvinfer1::INetworkDefinition* network_; + nvinfer1::ICudaEngine* engine_; + tensorflow::tensorrt::Logger* logger_; + // TODO(sami): Use threadpool threads! + std::thread* thr_; +}; + +class TRTWeightStore : public tensorflow::ResourceBase { + public: + TRTWeightStore() {} + std::list> store_; + string DebugString() override { + std::stringstream oss; + size_t lenBytes = 0; + for (const auto& v : store_) { + lenBytes += v.size() * sizeof(uint8_t); + } + oss << " Number of entries = " << store_.size() << std::endl + << " Total number of bytes = " + << store_.size() * sizeof(std::vector) + lenBytes << std::endl; + return oss.str(); + } + virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } +}; + +class TRTEngineResource : public tensorflow::ResourceBase { + public: + TRTEngineResource() : runtime_(nullptr), ctx_(nullptr){}; + string DebugString() override { return string(""); } + nvinfer1::IRuntime* runtime_; + nvinfer1::IExecutionContext* ctx_; +}; + +} // namespace tensorrt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ +#endif +#endif -- GitLab From 0770b3f963405974692bf0908fcb7db8df81d3f6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 17:28:48 -0800 Subject: [PATCH 237/311] Implement partial constant propagation through IdentityN. PiperOrigin-RevId: 187560028 --- .../grappler/optimizers/constant_folding.cc | 51 ++++++++++++++++++ .../optimizers/constant_folding_test.cc | 53 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 32c8a9b2f5..77804142e6 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1843,6 +1843,57 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, continue; } + // Partial constant propagation through IdentityN. + if (IsIdentityN(*node) && NumNonControlInputs(*node) > 0) { + const std::set& tmp = node_map_->GetOutputs(node->name()); + const std::vector consumers(tmp.begin(), tmp.end()); + for (int input_idx = 0; input_idx < node->input_size(); ++input_idx) { + const string& input = node->input(input_idx); + if (IsControlInput(input)) { + break; + } + const NodeDef* input_node = node_map_->GetNode(NodeName(input)); + if (input_node == nullptr) { + LOG(ERROR) << "Bad input: " << input; + break; + } + // Forward constant inputs to outputs and add a control dependency on + // the IdentityN node. + if (IsReallyConstant(*input_node)) { + // Update each consumer. + for (NodeDef* consumer : consumers) { + bool add_dep = false; + for (int consumer_input_idx = 0; + consumer_input_idx < consumer->input_size(); + ++consumer_input_idx) { + const string& consumer_input = + consumer->input(consumer_input_idx); + if (IsControlInput(consumer_input)) { + break; + } + int output_idx; + const string input_node_name = + ParseNodeName(consumer_input, &output_idx); + if (input_node_name == node->name() && output_idx == input_idx) { + consumer->set_input(consumer_input_idx, input); + // We will keep the input from IdentityN through a control + // dependendy, so we only need to add the consumer as an output + // for the constant input node. + node_map_->AddOutput(NodeName(input), consumer->name()); + add_dep = true; + } + } + if (add_dep) { + consumer->add_input(AsControlDependency(node->name())); + } + } + } + } + for (NodeDef* consumer : consumers) { + DedupControlInputs(consumer); + } + } + // Partial constant folding for associative operators: // Split AddN/AccumulateNV2 to enable partial // folding of ops when more than one but not all inputs are constant. diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 3149e1d53e..29dc93c257 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -1646,6 +1646,59 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { } } +TEST_F(ConstantFoldingTest, IdenticalN) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output x = ops::Placeholder(scope.WithOpName("x"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({}))); + Output c1 = ops::Const(scope.WithOpName("c1"), 1.0f, {2, 2}); + Output c2 = ops::Const(scope.WithOpName("c2"), 2.0f, {2, 2}); + auto id_n = ops::IdentityN(scope.WithOpName("id_n"), {c1, x, c2}); + auto id0 = ops::Identity(scope.WithOpName("id0"), id_n[1]); + auto id1 = ops::Identity(scope.WithOpName("id1"), id_n[0]); + auto add0 = ops::Add(scope.WithOpName("add0"), id_n[0], id_n[1]); + auto add1 = ops::Add(scope.WithOpName("add1"), id_n[0], id_n[2]); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch.push_back("id0"); + item.fetch.push_back("id1"); + item.fetch.push_back("add0"); + item.fetch.push_back("add1"); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + + TF_EXPECT_OK(status); + EXPECT_EQ(8, output.node_size()); + // id_n should remain unchanged. + EXPECT_EQ("id_n", output.node(3).name()); + EXPECT_EQ(3, output.node(3).input_size()); + EXPECT_EQ("c1", output.node(3).input(0)); + EXPECT_EQ("x", output.node(3).input(1)); + EXPECT_EQ("c2", output.node(3).input(2)); + // id0 is unchanged. + EXPECT_EQ("id0", output.node(4).name()); + EXPECT_EQ(1, output.node(4).input_size()); + // id1 should have the constant input forwarded to it, + // and a control dependency from id_n. + EXPECT_EQ("id1", output.node(5).name()); + EXPECT_EQ(2, output.node(5).input_size()); + EXPECT_EQ("c1", output.node(5).input(0)); + EXPECT_EQ("^id_n", output.node(5).input(1)); + + EXPECT_EQ("add0", output.node(6).name()); + EXPECT_EQ(2, output.node(6).input_size()); + EXPECT_EQ("c1", output.node(6).input(0)); + EXPECT_EQ("id_n:1", output.node(6).input(1)); + + EXPECT_EQ("add1", output.node(7).name()); + EXPECT_EQ(3, output.node(7).input_size()); + EXPECT_EQ("c1", output.node(7).input(0)); + EXPECT_EQ("c2", output.node(7).input(1)); + EXPECT_EQ("^id_n", output.node(7).input(2)); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 8a591af6854ee1b010d82d262072b5d3b2cdf7cc Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 1 Mar 2018 17:37:49 -0800 Subject: [PATCH 238/311] Checkpointable: Make Templates Checkpointable Uses a variable_creator_scope to hook all variable creation within the Template. For variables without a more deeply nested Template parent, it adds a dependency directly. For variables with a Template parent, it adds a dependency on the sub-Template instead. The variable scope prefix for the Template itself is stripped. However, any variable_scopes inside the Template (such as those for Layers) will be included in the dependency names. So within Templates we essentially have name-based saving (with the exception of dependencies between Templates themselves, which use the object-based dependency mechanism). This isn't ideal, but will hopefully allow migration toward object oriented dependencies more smoothly. Throws an error on object-based save() if the dependencies don't match between Checkpointable and .variables. Includes a semi-related usability fix for the Checkpoint utility; mostly in unit tests, restore() is not called before save(), which when graph building was leading to the save counter not being initialized. Fixes that. PiperOrigin-RevId: 187560911 --- .../eager/python/checkpointable_utils.py | 11 +- .../eager/python/checkpointable_utils_test.py | 80 ++++++++++++ .../python/kernel_tests/template_test.py | 4 + tensorflow/python/ops/template.py | 117 +++++++++++++++++- 4 files changed, 207 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index ed431e02ea..89cd543f77 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -843,10 +843,17 @@ class Checkpoint(core_checkpointable.Checkpointable): def save(self, file_prefix, session=None): """Save a checkpoint. Wraps `tfe.CheckpointableSaver.save`.""" - assign_op = self.save_counter.assign_add(1) - if context.in_graph_mode(): + in_graph_mode = context.in_graph_mode() + if in_graph_mode: if session is None: session = ops.get_default_session() + if self._save_counter is None: + # When graph building, if this is a new save counter variable then it + # needs to be initialized before assign_add. This is only an issue if + # restore() has not been called first. + session.run(self.save_counter.initializer) + assign_op = self.save_counter.assign_add(1) + if in_graph_mode: session.run(assign_op) return self._saver.save( file_prefix=file_prefix, diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 9424de0835..c9db2bcafc 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -34,6 +34,7 @@ from tensorflow.python.layers import core from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import template from tensorflow.python.ops import variable_scope from tensorflow.python.training import adam from tensorflow.python.training import checkpointable @@ -855,6 +856,85 @@ class CheckpointingTests(test.TestCase): self.assertAllEqual(3., self.evaluate(beta1_power)) +class TemplateTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def test_checkpointable_save_restore(self): + + def _templated(): + v = variable_scope.get_variable( + "v", shape=[1], initializer=init_ops.zeros_initializer()) + v2 = variable_scope.get_variable( + "v2", shape=[1], initializer=init_ops.zeros_initializer()) + return v, v + 1., v2 + + save_template = template.make_template("s1", _templated) + save_root = checkpointable_utils.Checkpoint(my_template=save_template) + v1_save, _, v2_save = save_template() + self.evaluate(v1_save.assign([12.])) + self.evaluate(v2_save.assign([14.])) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = save_root.save(checkpoint_prefix) + + load_template = template.make_template("s2", _templated) + load_root = checkpointable_utils.Checkpoint(my_template=load_template) + status = load_root.restore(save_path) + var, var_plus_one, var2 = load_template() + self.assertEqual(2, len(load_template._checkpoint_dependencies)) + self.assertEqual("v", load_template._checkpoint_dependencies[0].name) + self.assertEqual("v2", load_template._checkpoint_dependencies[1].name) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([12.], self.evaluate(var)) + self.assertAllEqual([13.], self.evaluate(var_plus_one)) + self.assertAllEqual([14.], self.evaluate(var2)) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def test_checkpointable_save_restore_nested(self): + + def _inner_template(): + v = variable_scope.get_variable( + "v", shape=[1], initializer=init_ops.zeros_initializer()) + return v + + def _outer_template(): + first_inner = template.make_template("i1", _inner_template) + second_inner = template.make_template("i2", _inner_template) + v1 = first_inner() + v2 = second_inner() + v3 = second_inner() + return (first_inner, second_inner), (v1, v2, v3) + + with variable_scope.variable_scope("ignored"): + save_template = template.make_template("s1", _outer_template) + save_root = checkpointable_utils.Checkpoint(my_template=save_template) + (inner_template_one, inner_template_two), _ = save_template() + self.evaluate(inner_template_one.variables[0].assign([20.])) + self.evaluate(inner_template_two.variables[0].assign([25.])) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = save_root.save(checkpoint_prefix) + + load_template = template.make_template("s2", _outer_template) + load_root = checkpointable_utils.Checkpoint(my_template=load_template) + status = load_root.restore(save_path) + (inner_template_one, inner_template_two), (v1, v2, v3) = load_template() + outer_template_dependencies = load_root.my_template._checkpoint_dependencies + self.assertEqual(2, len(outer_template_dependencies)) + self.assertEqual("i1", outer_template_dependencies[0].name) + self.assertIs(inner_template_one, outer_template_dependencies[0].ref) + self.assertEqual("i2", outer_template_dependencies[1].name) + self.assertIs(inner_template_two, outer_template_dependencies[1].ref) + self.assertEqual(1, len(inner_template_one._checkpoint_dependencies)) + self.assertEqual("v", inner_template_one._checkpoint_dependencies[0].name) + self.assertEqual(1, len(inner_template_two._checkpoint_dependencies)) + self.assertEqual("v", inner_template_two._checkpoint_dependencies[0].name) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([20.], self.evaluate(v1)) + self.assertAllEqual([25.], self.evaluate(v2)) + self.assertAllEqual([25.], self.evaluate(v3)) + + class CheckpointCompatibilityTests(test.TestCase): def _initialized_model(self): diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py index a519b69b22..c42ae5a77d 100644 --- a/tensorflow/python/kernel_tests/template_test.py +++ b/tensorflow/python/kernel_tests/template_test.py @@ -356,6 +356,10 @@ class TemplateTest(test.TestCase): self.assertEqual("s1_1/nested/dummy:0", v5.name) self.assertEqual("s1_1/nested_1/dummy:0", v6.name) + self.assertEqual(2, len(tmpl1._checkpoint_dependencies)) + self.assertEqual("nested", tmpl1._checkpoint_dependencies[0].name) + self.assertEqual("nested_1", tmpl1._checkpoint_dependencies[1].name) + @test_util.run_in_graph_and_eager_modes() def test_nested_templates_with_defun(self): diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py index 424582b348..70e8040512 100644 --- a/tensorflow/python/ops/template.py +++ b/tensorflow/python/ops/template.py @@ -26,6 +26,7 @@ from tensorflow.python.eager import function from tensorflow.python.framework import ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_decorator from tensorflow.python.util.deprecation import deprecated @@ -230,7 +231,7 @@ def _skip_common_stack_elements(stacktrace, base_case): return stacktrace[-1:] -class Template(object): +class Template(checkpointable.CheckpointableBase): """Wrap a function to aid in variable sharing. Templates are functions that create variables the first time they are called @@ -294,12 +295,115 @@ class Template(object): # which is not the same as whether the scope has been created. self._variables_created = False + @property + def _checkpoint_dependencies(self): + """Sanity checking for object-based saving. + + Does not override Checkpointable dependency tracking, but checks that + variables accessible through Checkpointable dependencies on other `Template` + objects include all of the variable_scope-filtered `Template.variables`. + + Returns: + A list of checkpointable.CheckpointableReference objects. + Raises: + ValueError: If this object is not compatible with object-based saving. + """ + dependencies = super(Template, self)._checkpoint_dependencies + dependency_variables = [] + for _, dependency in dependencies: + if isinstance(dependency, Template): + dependency_variables.extend(dependency.variables) + else: + dependency_variables.append(dependency) + dependency_variables = set(dependency_variables) + not_included_variables = [] + for expected_variable in sorted(self.variables, key=lambda v: v.name): + if expected_variable not in dependency_variables: + not_included_variables.append(expected_variable) + if not_included_variables: + # Trying to save a Template which improperly tracks its variables. + raise ValueError( + ("The Template '%s' references variables which are not included via " + "object-based dependency tracking. Most likely a custom " + "getter/creator was registered which does not call Template's " + "custom variable creator (which is responsible for tracking " + "dependencies).\n\nExpected these variables to be dependencies: %s") + % (self, not_included_variables)) + return dependencies + + def _checkpointable_custom_creator(self, next_creator, name, initial_value, + checkpointable_parent=None, **kwargs): + """A variable creation hook which adds Checkpointable dependencies. + + Set during the `Template`'s first wrapped function execution. Ensures that + (a) `Template` objects depend on `Template`s created inside them which + create variables, and (b) that any variables not in a more deeply nested + `Template` are added as dependencies directly. + + The `checkpointable_parent` argument is passed between `Template` custom + creators but ignored when the variable object itself is created. This + argument indicates (if not `None`) that a more deeply nested `Template` has + already added the variable as a dependency, and that parent `Template`s + should add a dependency on that `Template` rather than on the variable + directly. + + Args: + next_creator: See `variable_scope.variable_creator_scope`; the next + creator in the chain. + name: The (full, scope-influenced) name of the variable. The scope name + for the Template itself is stripped for the purposes of object-based + dependency tracking, but scopes within Templates are respected. + initial_value: See `variable_scope.variable_creator_scope`. Taken + explicitly so the argument can be re-named and used with + `Checkpointable._add_variable_with_custom_getter`. + checkpointable_parent: If not None, a more deeply nested Template object + to add a dependency on (rather than depending on the variable directly). + **kwargs: Passed through to the next creator. + Returns: + The output of `next_creator`: the fetched/created variable object. + """ + def _call_next_creator_renaming_initializer(initializer, **inner_kwargs): + inner_kwargs.pop("name") # Ignored; this is the scope-stripped name which + # we don't want to propagate. + return next_creator( + initial_value=initializer, + name=name, + **inner_kwargs) + if name.startswith(self._variable_scope.name): + scope_stripped_name = name[len(self._variable_scope.name) + 1:] + if not checkpointable_parent: + return self._add_variable_with_custom_getter( + initializer=initial_value, + name=scope_stripped_name, + getter=_call_next_creator_renaming_initializer, + # Disable error checking for Checkpointable. Exceptions are instead + # raised if necessary when the object-based saver tries to + # save/restore the object. + overwrite=True, + checkpointable_parent=self, + **kwargs) + else: + self._track_checkpointable( + checkpointable_parent, + name=checkpointable_parent._variable_scope.name[ # pylint: disable=protected-access + len(self._variable_scope.name) + 1:], + overwrite=True) + return next_creator(name=name, initial_value=initial_value, + checkpointable_parent=self, **kwargs) + def _call_func(self, args, kwargs): try: vars_at_start = len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) trainable_at_start = len( ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) - result = self._func(*args, **kwargs) + if self._variables_created: + result = self._func(*args, **kwargs) + else: + # The first time we run, restore variables if necessary (via + # Checkpointable). + with variable_scope.variable_creator_scope( + self._checkpointable_custom_creator): + result = self._func(*args, **kwargs) if self._variables_created: # Variables were previously created, implying this is not the first @@ -563,7 +667,14 @@ class EagerTemplate(Template): try: vars_at_start = self._template_store.variables() trainable_at_start = self._template_store.trainable_variables() - result = self._func(*args, **kwargs) + if self._variables_created: + result = self._func(*args, **kwargs) + else: + # The first time we run, restore variables if necessary (via + # Checkpointable). + with variable_scope.variable_creator_scope( + self._checkpointable_custom_creator): + result = self._func(*args, **kwargs) if self._variables_created: # Variables were previously created, implying this is not the first -- GitLab From 4669767c4c6d830c2234c3ba15944a362b08fa14 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Thu, 1 Mar 2018 17:41:41 -0800 Subject: [PATCH 239/311] Add util which creates Python callable with tf.Variables explicitly as arguments. PiperOrigin-RevId: 187561302 --- tensorflow/contrib/bayesflow/BUILD | 17 -- tensorflow/contrib/bayesflow/__init__.py | 2 - .../kernel_tests/variable_utils_test.py | 135 --------------- .../bayesflow/python/ops/variable_utils.py | 29 ---- .../python/ops/variable_utils_impl.py | 157 ------------------ 5 files changed, 340 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/variable_utils.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 270c309ec3..3592cff90b 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -251,23 +251,6 @@ cuda_py_test( tags = ["notsan"], ) -cuda_py_test( - name = "variable_utils_test", - size = "small", - srcs = ["python/kernel_tests/variable_utils_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) - cuda_py_test( name = "variational_sgd_optimizer_test", size = "small", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index 528c4fbacd..c411026346 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -30,7 +30,6 @@ from tensorflow.contrib.bayesflow.python.ops import mcmc_diagnostics from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo from tensorflow.contrib.bayesflow.python.ops import optimizers -from tensorflow.contrib.bayesflow.python.ops import variable_utils # pylint: enable=unused-import,line-too-long from tensorflow.python.util.all_util import remove_undocumented @@ -49,7 +48,6 @@ _allowed_symbols = [ 'optimizers', 'special_math', 'stochastic_variables', - 'variable_utils', 'variational_inference', ] diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py deleted file mode 100644 index f978cf8641..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for utility functions related to managing `tf.Variable`s.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import variable_utils - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.ops import variable_scope as varscope_ops -from tensorflow.python.ops import variables as variables_ops -from tensorflow.python.platform import test - - -def test_fn(x): - x = ops.convert_to_tensor(x, name="x") - dtype = x.dtype.as_numpy_dtype - s = x.shape.as_list() - z = varscope_ops.get_variable( - name="z", - dtype=dtype, - initializer=np.arange(np.prod(s)).reshape(s).astype(dtype)) - y = varscope_ops.get_variable( - name="y", - dtype=dtype, - initializer=np.arange(np.prod(s)).reshape(s).astype(dtype)**2) - return x + y + z - - -class _WrapCallableTest(object): - - def testDefaultArgsWorkCorrectly(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - wrapped_fn, vars_args = variable_utils.externalize_variables_as_args( - test_fn, [x]) - - varscope_ops.get_variable_scope().reuse_variables() - - result = wrapped_fn(self.dtype(2), [3, 4, 5], 0.5) - - y_actual = varscope_ops.get_variable("y", dtype=self.dtype) - z_actual = varscope_ops.get_variable("z", dtype=self.dtype) - - variables_ops.global_variables_initializer().run() - result_ = result.eval() - - self.assertEqual(self.dtype, result_.dtype) - self.assertAllEqual([5.5, 6.5, 7.5], result_) - self.assertAllEqual([y_actual, z_actual], vars_args) - - def testNonDefaultArgsWorkCorrectly(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - - _ = test_fn(self.dtype([0., 0.])) # Needed to create vars. - varscope_ops.get_variable_scope().reuse_variables() - - y_actual = varscope_ops.get_variable("y", dtype=self.dtype) - - wrapped_fn, vars_args = variable_utils.externalize_variables_as_args( - test_fn, [x], possible_ancestor_vars=[y_actual]) - - result = wrapped_fn(self.dtype([2, 3]), 0.5) # x, y - - variables_ops.global_variables_initializer().run() - result_ = result.eval() - - self.assertEqual(self.dtype, result_.dtype) - self.assertAllEqual([2.5, 4.5], result_) - self.assertAllEqual([y_actual], vars_args) - - def testWarnings(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - wrapped_fn, _ = variable_utils.externalize_variables_as_args( - test_fn, [x], possible_ancestor_vars=[]) - varscope_ops.get_variable_scope().reuse_variables() - with warnings.catch_warnings(record=True) as w: - wrapped_fn(self.dtype(2)) - w = sorted(w, key=lambda w: str(w.message)) - self.assertEqual(2, len(w)) - self.assertRegexpMatches( - str(w[0].message), - r"Variable .* 'y:0' .* not found in bypass dict.") - self.assertRegexpMatches( - str(w[1].message), - r"Variable .* 'z:0' .* not found in bypass dict.") - - def testExceptions(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - wrapped_fn, _ = variable_utils.externalize_variables_as_args( - test_fn, - [x], - possible_ancestor_vars=[], - assert_variable_override=True) - varscope_ops.get_variable_scope().reuse_variables() - with self.assertRaisesRegexp(ValueError, r"not found"): - wrapped_fn(self.dtype(2)) - - -class WrapCallableTest16(test.TestCase, _WrapCallableTest): - dtype = np.float16 - - -class WrapCallableTest32(test.TestCase, _WrapCallableTest): - dtype = np.float32 - - -class WrapCallableTest64(test.TestCase, _WrapCallableTest): - dtype = np.float64 - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/variable_utils.py b/tensorflow/contrib/bayesflow/python/ops/variable_utils.py deleted file mode 100644 index eadf6f4d5f..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/variable_utils.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions related to managing `tf.Variable`s.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -from tensorflow.contrib.bayesflow.python.ops.variable_utils_impl import * # pylint: disable=wildcard-import,unused-wildcard-import,g-importing-member -from tensorflow.python.util import all_util - -_allowed_symbols = [ - "externalize_variables_as_args", -] - -all_util.remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py b/tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py deleted file mode 100644 index ca3d75b5bf..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions related to managing `tf.Variable`s. - -@@externalize_variables_as_args -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings - -from tensorflow.python.framework import ops -from tensorflow.python.ops import gradients_impl as gradients_ops -from tensorflow.python.ops import variable_scope as varscope_ops -from tensorflow.python.ops import variables as variables_ops - -__all__ = [ - "externalize_variables_as_args", -] - - -# Cause all warnings to always be triggered. -# Not having this means subsequent calls wont trigger the warning. -warnings.simplefilter("always") - - -def externalize_variables_as_args(fn, - fn_args=(), - ancestor_variables=None, - possible_ancestor_vars=None, - assert_variable_override=False, - name=None): - """"Converts variables within a callable into explicit args. - - Makes a new callable from `fn` which has arguments `list(fn_args) + - list(ancestor_variables)`. If `ancestor_variables` is not specified, it is - inferred by checking which of `possible_ancestor_vars` actually influences the - return value of `fn` (concretely, gradient of `fn(*fn_args)` is not `None`). - By default `possible_ancestor_vars` is `tf.trainable_variables() + - tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)`. - - #### Examples: - - ```python - num_samples = 2 - num_dims = 1 - dtype = np.float32 - - def foo(x): - x = tf.convert_to_tensor(x, dtype=dtype, name="x") - s = x.shape.as_list() - y = tf.get_variable( - name="y", - dtype=dtype, - initializer=np.arange(np.prod(s)).reshape(s).astype(dtype)) - return x + y - - x = tf.constant(dtype([0.1, 0.2])) - - wrapped_foo, discovered_ancestor_variables = ( - externalize_variables_as_args(foo, [x])) - - new_x = dtype([[1.], [2.]]) - new_y = dtype([[3.], [4.]]) - new_result = wrapped_foo(new_x, new_y) - # ==> [[4.], [6.]] - - discovered_ancestor_variables == [tf.get_variable("y", dtype)] - # ==> [True] - ``` - - Args: - fn: Python callable which returns a `Tensor` and accepts `*fn_args`. - fn_args: Python list of args to `fn`. Represents dummy arguments passed to - `fn` to trace its execution; actual values are unimportant. These args are - only used to construct the output of `fn` and to resolve the ancestor - `tf.Variable`s. - Default value: `()` (i.e., `fn` takes no args). - ancestor_variables: Python list of `tf.Variable`s. When `None` the list is - expanded to non-`None` gradients of `fn(*fn_args)`. By directly providing - the `ancestor_variables` the internal call to `fn` is avoided. - Default value: `None` (i.e., `tf.Variable` dependencies are discovered). - possible_ancestor_vars: Python list of possible `tf.Variable`s which might - be a dependency of computing `fn(*fn_args)`. - Default value: `None` (i.e., expanded as described above). - assert_variable_override: Python `bool` indicating that not finding a - `tf.Variable` in the override list is an exception. - Default value: `False` (i.e., missing a `Variable` triggers a `warning`). - name: Python `str` name prefixed to Ops created by this function. - Default value: `None` (i.e., "externalize_variables_as_args"). - - Returns: - wrapped_fn: Python callable taking arguments like - `*(list(fn_args) + discovered_ancestor_variables)`. - discovered_ancestor_variables: Python list of `tf.Variable`s known to be a - dependency of `fn(*fn_args)`. - - Raises: - ValueError: if `assert_variable_override` is `True` and `Variable` is - requested but not overridden. - """ - def _make_bypassing_custom_getter_fn(new_var_dict): - """Return dict value rather than what would otherwise be dict key.""" - def _custom_getter(getter, *args, **kwargs): - v = getter(*args, **kwargs) - new_v = new_var_dict.get(v, None) - if new_v is None: - msg = "Variable \"{}\" not found in bypass dict.".format(v) - if assert_variable_override: - raise ValueError(msg) - warnings.warn(msg) - return v - return new_v - return _custom_getter - - with ops.name_scope(name, "externalize_variables_as_args"): - if ancestor_variables is not None and not ancestor_variables: - return fn, () - if ancestor_variables is None: - y = fn(*fn_args) # Side-effect: adds trainable vars. - if possible_ancestor_vars is None: - possible_ancestor_vars = ( - variables_ops.trainable_variables() + - ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) - # TODO(b/72873296): Add a dedicated op for identifying ancestors. - ancestors = [v for g, v - in zip(gradients_ops.gradients(y, possible_ancestor_vars), - possible_ancestor_vars) - if g is not None] - ancestor_variables = sorted(ancestors, key=lambda v: v.name) - n = len(fn_args) - def _fn(*args): - with ops.name_scope("wrapped_fn"): - vars_dict = dict( - (k, ops.convert_to_tensor( - v, dtype=k.dtype.base_dtype, name=k.op.name)) - for k, v in zip(ancestor_variables, args[n:])) - with varscope_ops.variable_scope( - varscope_ops.get_variable_scope(), - reuse=True, - custom_getter=_make_bypassing_custom_getter_fn(vars_dict)): - return fn(*args[:n]) - return _fn, ancestor_variables -- GitLab From e927be3872e00c9b0e5e9aa64e6aae90c4ae1315 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 1 Mar 2018 17:53:49 -0800 Subject: [PATCH 240/311] Improve CURL error reporting and handling in the GCS filesystem. - The main semantics change is that we return immediately if curl_easy_perform doesn't return CURLE_OK. The CURL documentation indicates that it's not ok to fetch info if the curl call failed: https://curl.haxx.se/libcurl/c/curl_easy_getinfo.html - LOG errors where we can't return a status. Otherwise return with a status immediately. PiperOrigin-RevId: 187562481 --- .../core/platform/cloud/curl_http_request.cc | 190 ++++++++++++------ .../core/platform/cloud/curl_http_request.h | 44 +++- .../platform/cloud/curl_http_request_test.cc | 18 +- 3 files changed, 176 insertions(+), 76 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 80ad1cf0b8..9bc06d56ae 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/version.h" @@ -129,20 +130,34 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env) // default in //third_party:curl.BUILD and can be customized via an // environment variable. - libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput); - libcurl_->curl_easy_setopt( - curl_, CURLOPT_USERAGENT, - strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str()); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput), + "Setting verbose output"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt( + curl_, CURLOPT_USERAGENT, + strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str()), + "Setting user agent"); // Do not use signals for timeouts - does not work in multi-threaded programs. - libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L); - libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, - CURL_HTTP_VERSION_2_0); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L), + "Disabling signals"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, + CURL_HTTP_VERSION_2_0), + "Setting HTTP version"); // Set up the progress meter. - libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL); - libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this); - libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION, - &CurlHttpRequest::ProgressCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL), + "Disabling progress meter"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this), + "Setting custom pointer to the progress callback"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION, + &CurlHttpRequest::ProgressCallback), + "Setting the progress callback"); // If response buffer is not set, libcurl will print results to stdout, // so we always set it. @@ -175,13 +190,17 @@ void CurlHttpRequest::SetUri(const string& uri) { CheckNotSent(); is_uri_set_ = true; uri_ = uri; - libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str()); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str()), + "Setting URL"); } void CurlHttpRequest::SetRange(uint64 start, uint64 end) { CheckNotSent(); - libcurl_->curl_easy_setopt(curl_, CURLOPT_RANGE, - strings::StrCat(start, "-", end).c_str()); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_RANGE, + strings::StrCat(start, "-", end).c_str()), + "Setting range"); } void CurlHttpRequest::AddHeader(const string& name, const string& value) { @@ -210,7 +229,9 @@ void CurlHttpRequest::SetDeleteRequest() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; - libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE"), + "Setting delete request"); } Status CurlHttpRequest::SetPutFromFile(const string& body_filepath, @@ -232,9 +253,12 @@ Status CurlHttpRequest::SetPutFromFile(const string& body_filepath, curl_headers_ = libcurl_->curl_slist_append( curl_headers_, strings::StrCat("Content-Length: ", size).c_str()); - libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(put_body_)); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting PUT request"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(put_body_)), + "Setting read data"); // Using the default CURLOPT_READFUNCTION, which is doing an fread() on the // FILE * userdata set with CURLOPT_READDATA. return Status::OK(); @@ -244,13 +268,18 @@ void CurlHttpRequest::SetPutEmptyBody() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; - libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting put request"); curl_headers_ = libcurl_->curl_slist_append(curl_headers_, "Content-Length: 0"); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this)), + "Setting read data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback), + "Setting read callback"); } void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) { @@ -259,11 +288,17 @@ void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) { is_method_set_ = true; curl_headers_ = libcurl_->curl_slist_append( curl_headers_, strings::StrCat("Content-Length: ", size).c_str()); - libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), + "Setting POST request"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this)), + "Setting read data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback), + "Setting read callback"); post_body_buffer_ = StringPiece(buffer, size); } @@ -271,13 +306,19 @@ void CurlHttpRequest::SetPostEmptyBody() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; - libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), + "Setting POST request"); curl_headers_ = libcurl_->curl_slist_append(curl_headers_, "Content-Length: 0"); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this)), + "Setting read data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback), + "Setting read callback"); } void CurlHttpRequest::SetResultBuffer(std::vector* out_buffer) { @@ -287,10 +328,14 @@ void CurlHttpRequest::SetResultBuffer(std::vector* out_buffer) { out_buffer->clear(); response_buffer_ = out_buffer; - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, - &CurlHttpRequest::WriteCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, + reinterpret_cast(this)), + "Setting write data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, + &CurlHttpRequest::WriteCallback), + "Setting write callback"); } void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) { @@ -299,10 +344,14 @@ void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) { direct_response_ = DirectResponseState{buffer, size, 0}; - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, - &CurlHttpRequest::WriteCallbackDirect); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, + reinterpret_cast(this)), + "Setting write data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, + &CurlHttpRequest::WriteCallbackDirect), + "Setting write callback"); } bool CurlHttpRequest::IsDirectResponse() const { @@ -424,29 +473,50 @@ Status CurlHttpRequest::Send() { is_sent_ = true; if (curl_headers_) { - libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_), + "Setting HTTP header"); } if (resolve_list_) { - libcurl_->curl_easy_setopt(curl_, CURLOPT_RESOLVE, resolve_list_); - } - libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION, - &CurlHttpRequest::HeaderCallback); - - libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_); - libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT, - connect_timeout_secs_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_RESOLVE, resolve_list_), + "Setting custom resolves"); + } + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERDATA, + reinterpret_cast(this)), + "Setting header data"); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION, + &CurlHttpRequest::HeaderCallback), + "Setting header function"); + + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_), + "Setting request timeout"); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT, + connect_timeout_secs_), + "Setting connection timeout"); char error_buffer[CURL_ERROR_SIZE] = {0}; - libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer), + "Setting error buffer"); - const auto curl_result = libcurl_->curl_easy_perform(curl_); + const CURLcode curl_result = libcurl_->curl_easy_perform(curl_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + curl_result, "Performing request. Detailed error: ", error_buffer); double written_size = 0; - libcurl_->curl_easy_getinfo(curl_, CURLINFO_SIZE_DOWNLOAD, &written_size); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_getinfo(curl_, CURLINFO_SIZE_DOWNLOAD, &written_size), + "Fetching written size"); - libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, &response_code_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, + &response_code_), + "Fetching response code"); Status result; switch (response_code_) { @@ -616,4 +686,12 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal, return 0; } +Status CURLcodeToStatus(CURLcode code) { + // Return Unavailable to retry by default. We probably should distinguish + // between permanent or temporary failures. + return errors::Unavailable("Error executing an HTTP request (error code ", + code, ", error message '", + curl_easy_strerror(code), "')"); +} + } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h index cfa26f2b79..c9f60cb5fc 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.h +++ b/tensorflow/core/platform/cloud/curl_http_request.h @@ -229,26 +229,28 @@ class LibCurl { virtual CURL* curl_easy_init() = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - uint64 param) = 0; + uint64 param) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - const char* param) = 0; + const char* param) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - void* param) = 0; - virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - size_t (*param)(void*, size_t, size_t, - FILE*)) = 0; + void* param) TF_MUST_USE_RESULT = 0; + virtual CURLcode curl_easy_setopt( + CURL* curl, CURLoption option, + size_t (*param)(void*, size_t, size_t, FILE*)) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, size_t (*param)(const void*, size_t, size_t, - void*)) = 0; + void*)) + TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt( CURL* curl, CURLoption option, int (*param)(void* clientp, curl_off_t dltotal, curl_off_t dlnow, - curl_off_t ultotal, curl_off_t ulnow)) = 0; - virtual CURLcode curl_easy_perform(CURL* curl) = 0; + curl_off_t ultotal, + curl_off_t ulnow)) TF_MUST_USE_RESULT = 0; + virtual CURLcode curl_easy_perform(CURL* curl) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_getinfo(CURL* curl, CURLINFO info, - uint64* value) = 0; + uint64* value) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_getinfo(CURL* curl, CURLINFO info, - double* value) = 0; + double* value) TF_MUST_USE_RESULT = 0; virtual void curl_easy_cleanup(CURL* curl) = 0; virtual curl_slist* curl_slist_append(curl_slist* list, const char* str) = 0; virtual void curl_slist_free_all(curl_slist* list) = 0; @@ -258,6 +260,26 @@ class LibCurl { virtual const char* curl_easy_strerror(CURLcode errornum) = 0; }; +Status CURLcodeToStatus(CURLcode code); + +#define TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR(_code, ...) \ + do { \ + if (_code != CURLE_OK) { \ + ::tensorflow::Status _status = ::tensorflow::CURLcodeToStatus(_code); \ + ::tensorflow::errors::AppendToMessage(&_status, __VA_ARGS__); \ + return _status; \ + } \ + } while (0) + +#define TF_CURL_LOG_WITH_CONTEXT_IF_ERROR(_code, ...) \ + do { \ + if (_code != CURLE_OK) { \ + ::tensorflow::Status _status = ::tensorflow::CURLcodeToStatus(_code); \ + ::tensorflow::errors::AppendToMessage(&_status, __VA_ARGS__); \ + LOG(ERROR) << "curl error: " << _status.error_message(); \ + } \ + } while (0) + } // namespace tensorflow #endif // TENSORFLOW_CORE_PLATFORM_CLOUD_CURL_HTTP_REQUEST_H_ diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 94af121768..4cded9b81b 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -346,7 +346,6 @@ TEST(CurlHttpRequestTest, GetRequest_Empty) { TEST(CurlHttpRequestTest, GetRequest_RangeOutOfBound) { FakeLibCurl libcurl("get response", 416); - libcurl.curl_easy_perform_result_ = CURLE_WRITE_ERROR; CurlHttpRequest http_request(&libcurl); std::vector scratch; @@ -377,10 +376,10 @@ TEST(CurlHttpRequestTest, GetRequest_503) { const auto& status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (HTTP response code 503, " - "error code 23, error message ''), response 'get response'", + "Error executing an HTTP request (error code 23, error message 'Failed " + "writing received data to disk/application')\n\tPerforming request. " + "Detailed error: ", status.error_message()); - EXPECT_EQ(503, http_request.GetResponseCode()); } TEST(CurlHttpRequestTest, GetRequest_HttpCode0) { @@ -396,9 +395,9 @@ TEST(CurlHttpRequestTest, GetRequest_HttpCode0) { const auto& status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (HTTP response code 0, " - "error code 28, error message 'Operation timed out'), " - "response 'get response'", + "Error executing an HTTP request (error code 28, error message 'Timeout " + "was reached')\n\tPerforming request. Detailed error: Operation timed " + "out", status.error_message()); EXPECT_EQ(0, http_request.GetResponseCode()); } @@ -629,8 +628,9 @@ TEST(CurlHttpRequestTest, ProgressIsStuck) { auto status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (HTTP response code 200, " - "error code 42, error message ''), response 'test'", + "Error executing an HTTP request (error code 42, error message " + "'Operation was aborted by an application callback')\n\tPerforming " + "request. Detailed error: ", status.error_message()); } -- GitLab From 80a647612e2cc0b98f763ffca1f7f35df7d27805 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 17:58:07 -0800 Subject: [PATCH 241/311] Allow replacing attributes in templates. PiperOrigin-RevId: 187562864 --- tensorflow/contrib/py2tf/pyct/templates.py | 11 +++++++++++ .../contrib/py2tf/pyct/templates_test.py | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/py2tf/pyct/templates.py index 6ee6c0c5ce..7021e2ba93 100644 --- a/tensorflow/contrib/py2tf/pyct/templates.py +++ b/tensorflow/contrib/py2tf/pyct/templates.py @@ -79,6 +79,17 @@ class ReplaceTransformer(gast.NodeTransformer): else: raise ValueError('unexpected node type "%s"' % node) + def visit_Attribute(self, node): + node = self.generic_visit(node) + if node.attr not in self.replacements: + return node + repl = self.replacements[node.attr] + if not isinstance(repl, gast.Name): + raise ValueError( + 'An attribute can only be replaced by a Name node. Found: %s' % repl) + node.attr = repl.id + return node + def visit_Name(self, node): if node.id not in self.replacements: return node diff --git a/tensorflow/contrib/py2tf/pyct/templates_test.py b/tensorflow/contrib/py2tf/pyct/templates_test.py index 8ccfde8573..0d1c1c5d9e 100644 --- a/tensorflow/contrib/py2tf/pyct/templates_test.py +++ b/tensorflow/contrib/py2tf/pyct/templates_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import imp + import gast from tensorflow.contrib.py2tf.pyct import compiler @@ -62,7 +64,7 @@ class TemplatesTest(test.TestCase): result, _ = compiler.ast_to_object(node) self.assertEquals(7, result.test_fn(2)) - def test_code_block(self): + def test_replace_code_block(self): template = """ def test_fn(a): block @@ -79,6 +81,21 @@ class TemplatesTest(test.TestCase): result, _ = compiler.ast_to_object(node) self.assertEquals(3, result.test_fn(1)) + def test_replace_attribute(self): + template = """ + def test_fn(a): + return a.foo + """ + + node = templates.replace(template, foo='b')[0] + result, _ = compiler.ast_to_object(node) + mod = imp.new_module('test') + mod.b = 3 + self.assertEquals(3, result.test_fn(mod)) + + with self.assertRaises(ValueError): + templates.replace(template, foo=1) + if __name__ == '__main__': test.main() -- GitLab From 6d1309419497d52ef9a28df927a0b214cde9507c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 18:03:19 -0800 Subject: [PATCH 242/311] Grappler: Change memory optimizer recomputation name prefix into a regexp. This allows us to match any node names, especially those under different scopes. This still performs a prefix regexp match, so it is basically backwards compatible. PiperOrigin-RevId: 187563544 --- tensorflow/core/BUILD | 1 + tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/memory_optimizer.cc | 20 ++++++++----- .../grappler/optimizers/memory_optimizer.h | 10 +++---- .../grappler/optimizers/meta_optimizer.cc | 4 +-- .../core/protobuf/rewriter_config.proto | 16 +++++----- .../python/grappler/memory_optimizer_test.py | 29 ++++++++++++++++++- 7 files changed, 56 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 3271825251..96e30ca3c0 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2231,6 +2231,7 @@ cc_library( ], visibility = [ "//tensorflow/compiler:__subpackages__", + "//tensorflow/core/grappler:__subpackages__", "//tensorflow/core/profiler:__subpackages__", ], deps = [":lib_internal"], diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 037438ee75..0a4330b524 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -363,6 +363,7 @@ cc_library( ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", + "//tensorflow/core:regexp_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 694139fa50..d73050ac4d 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/grappler/utils/traversal.h" +#include "tensorflow/core/platform/regexp.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -413,7 +414,7 @@ void RecomputeSubgraph( } void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix, + const string& recomputation_targets_name_regexp, GraphDef* graph, const GrapplerItem& item) { if (optimization_level != RewriterConfig::RECOMPUTATION_HEURISTICS && optimization_level != RewriterConfig::HEURISTICS && @@ -437,16 +438,19 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, for (const auto& feed : item.feed) { feeds.insert(NodeName(feed.first)); } + RE2 recomputation_targets_re(recomputation_targets_name_regexp); std::function is_target = - [&recomputation_targets_name_prefix](const NodeDef& node) { - // Nodes whose inputs we may want to recompute. Typically targets will - // be gradients (recomputation_targets_name_prefix="gradients/"), - // although the prefix is configurable since gradients may be created - // in a name scope. + [&recomputation_targets_re](const NodeDef& node) { + // Nodes whose inputs we may want to recompute. This does a prefix + // regexp match, and typically one sets regexp="gradients/" meaning + // it will match all node names with scope beginning with "gradients/". + // If used within scopes, one may want to set regexp="(.+/)?gradients/". // TODO(allenl): Use a static schedule // (grappler::EstimateEarliestExecutionTimes) to recompute only nodes // whose outputs will sit around for a while. - return node.name().find(recomputation_targets_name_prefix) == 0; + bool match = recomputation_targets_re.Match( + node.name(), 0, node.name().size(), RE2::ANCHOR_START, nullptr, 0); + return match; }; if (optimization_level == RewriterConfig::RECOMPUTATION_HEURISTICS || @@ -1225,7 +1229,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = item.graph; RecomputationRewritingPass(optimization_level_, - recomputation_targets_name_prefix_, + recomputation_targets_name_regexp_, optimized_graph, item); GrapplerItem optimized_item(item, std::move(*optimized_graph)); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index c3dd0c45c6..62ab969848 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -27,14 +27,14 @@ class MemoryOptimizer : public GraphOptimizer { public: // optimization_level: Controls the level of autonomy for the memory // optimizer. See RewriterConfig::memory_optimization. - // recomputation_targets_name_prefix: Name prefix for potential outputs of + // recomputation_targets_name_regxp: Name regxp for potential outputs of // recomputations. See - // RewriterConfig::memory_optimizer_target_node_name_prefix. + // RewriterConfig::memory_optimizer_target_node_name_regxp. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix = "gradients/") + const string& recomputation_targets_name_regexp = "gradients/") : optimization_level_(optimization_level), - recomputation_targets_name_prefix_(recomputation_targets_name_prefix) {} + recomputation_targets_name_regexp_(recomputation_targets_name_regexp) {} ~MemoryOptimizer() override {} string name() const override { return "memory_optimizer"; }; @@ -47,7 +47,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; - string recomputation_targets_name_prefix_; + string recomputation_targets_name_regexp_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 72d7b94dc8..979f3e7161 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -119,7 +119,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, std::unique_ptr(new LayoutOptimizer())); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_prefix().empty()) { + if (cfg_.memory_optimizer_target_node_name_regexp().empty()) { optimizers.push_back(std::unique_ptr( // Use the default target node name prefix "gradients/" new MemoryOptimizer(cfg_.memory_optimization()))); @@ -127,7 +127,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back( std::unique_ptr(new MemoryOptimizer( cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_prefix()))); + cfg_.memory_optimizer_target_node_name_regexp()))); } } if (cfg_.auto_parallel().enable()) { diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 9ebf217811..63303fa968 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -78,16 +78,14 @@ message RewriterConfig { // effect on manually requested memory optimization passes in the optimizers // field. MemOptType memory_optimization = 4; - // The prefix for nodes which are valid outputs of recomputations. Inputs to - // nodes with this name prefix may be recomputed (subject either to manual + // A regexp for node names which are valid outputs of recomputations. Inputs + // to nodes that match this regexp may be recomputed (subject either to manual // annotation of those input nodes or to manual annotation and heuristics - // depending on memory_optimization), but the prefixed nodes themselves will - // not be recomputed. Typically this will be "gradients/", indicating that - // activations from the forward pass of a graph may be recomputed as inputs to - // gradients, but may be adjusted if gradients are inside a name scope or if - // inputs to non-gradients should be recomputed. Defaults to "gradients/" if - // empty or not set. - string memory_optimizer_target_node_name_prefix = 6; + // depending on memory_optimization), but the nodes themselves will not be + // recomputed. This is a prefix match, meaning it matches any node name that + // contains a prefix that matches this regexp. Defaults to "gradients/" if + // not provided, but can be changed if used within scopes. + string memory_optimizer_target_node_name_regexp = 6; // Configures AutoParallel optimization passes either through the // meta-optimizer or when manually specified through the optimizers field. diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 948911f099..58d3c1e85f 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -162,7 +162,34 @@ class MemoryOptimizerRecomputeTest(test.TestCase): arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_prefix='optimizer/gradients/'), + memory_optimizer_target_node_name_regexp='optimizer/gradients/'), + original_metagraph) + self.assertGreater( + len(rewritten_graph_def.node), + len(original_metagraph.graph_def.node)) + self.assertEqual( + 0, + len([node for node in original_metagraph.graph_def.node + if 'Recomputed/' in node.name])) + self.assertEqual( + 20, # Two per layer + len([node for node in rewritten_graph_def.node + if 'Recomputed/' in node.name])) + + def testRewritingNameScopedGradientNamesRegexp(self): + """Tests that rewriting occurs with non-standard gradient names.""" + (original_metagraph, _, _, _) = self._GetMetaGraph( + optimizer_scope_name='foo/bar') + rewritten_graph_def = tf_optimizer.OptimizeGraph( + rewriter_config_pb2.RewriterConfig( + disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, + dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, + layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, + arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, + memory_optimization=rewriter_config_pb2.RewriterConfig. + RECOMPUTATION_HEURISTICS, + memory_optimizer_target_node_name_regexp='(.+/)gradients/'), original_metagraph) self.assertGreater( len(rewritten_graph_def.node), -- GitLab From bf1abe945330dffe3f93b81344185f629bef023f Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 1 Mar 2018 18:49:05 -0800 Subject: [PATCH 243/311] [XLA] For graphviz graph dumps that are colored by sharding, choose the fill color for fusion nodes according to the sharding color rather than always choosing grey. PiperOrigin-RevId: 187567679 --- .../compiler/xla/service/hlo_graph_dumper.cc | 104 ++++++++++-------- 1 file changed, 61 insertions(+), 43 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 99c4932a38..1dc72355cf 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -157,52 +157,60 @@ enum ColorScheme { kDashedBorder, }; +// Graphviz attributes/colors that make up a color scheme. +struct NodeColors { + const char* style; + const char* fill_color; + const char* stroke_color; + const char* font_color; +}; + +NodeColors NodeColorsForScheme(ColorScheme color) { + switch (color) { + case kBlue: + return NodeColors{"filled", "#bbdefb", "#8aacc8", "black"}; + case kBrown: + return NodeColors{"filled", "#bcaaa4", "#8c7b75", "black"}; + case kDarkBlue: + return NodeColors{"filled", "#1565c0", "#003c8f", "white"}; + case kDarkGreen: + return NodeColors{"filled", "#2e7d32", "#005005", "white"}; + case kDarkRed: + return NodeColors{"filled", "#b71c1c", "#7f0000", "white"}; + case kGray: + return NodeColors{"filled", "#cfd8dc", "#9ea7aa", "black"}; + case kGreen: + return NodeColors{"filled", "#c8e6c9", "#97b498", "black"}; + case kOrange: + return NodeColors{"filled", "#ffe0b2", "#cbae82", "black"}; + case kPurple: + return NodeColors{"filled", "#e1bee7", "#af8eb5", "black"}; + case kRed: + return NodeColors{"filled", "#ffcdd2", "#cb9ca1", "black"}; + case kWhite: + return NodeColors{"filled", "white", "black", "black"}; + case kYellow: + return NodeColors{"filled", "#fff9c4", "#cbc693", "black"}; + case kDashedBorder: + // "filled,dashed" looks the same as "dashed", since we have a white + // background. But we use "filled,dashed" so that when you hover over + // any part of the node (not just the text inside the node), our css + // :hover rule is triggered. + return NodeColors{"filled,dashed", "white", "#757575", "#757575"}; + } +} + // Given a ColorScheme, returns an attribute string for a node of that color. // Sets the node's style and fill/stroke/text colors. // // Colors are from https://material.io/color. string NodeColorAttributes(ColorScheme color) { - using std::make_tuple; - - const char *style, *fill_color, *stroke_color, *font_color; - std::tie(style, fill_color, stroke_color, font_color) = [color] { - switch (color) { - case kBlue: - return make_tuple("filled", "#bbdefb", "#8aacc8", "black"); - case kBrown: - return make_tuple("filled", "#bcaaa4", "#8c7b75", "black"); - case kDarkBlue: - return make_tuple("filled", "#1565c0", "#003c8f", "white"); - case kDarkGreen: - return make_tuple("filled", "#2e7d32", "#005005", "white"); - case kDarkRed: - return make_tuple("filled", "#b71c1c", "#7f0000", "white"); - case kGray: - return make_tuple("filled", "#cfd8dc", "#9ea7aa", "black"); - case kGreen: - return make_tuple("filled", "#c8e6c9", "#97b498", "black"); - case kOrange: - return make_tuple("filled", "#ffe0b2", "#cbae82", "black"); - case kPurple: - return make_tuple("filled", "#e1bee7", "#af8eb5", "black"); - case kRed: - return make_tuple("filled", "#ffcdd2", "#cb9ca1", "black"); - case kWhite: - return make_tuple("filled", "white", "black", "black"); - case kYellow: - return make_tuple("filled", "#fff9c4", "#cbc693", "black"); - case kDashedBorder: - // "filled,dashed" looks the same as "dashed", since we have a white - // background. But we use "filled,dashed" so that when you hover over - // any part of the node (not just the text inside the node), our css - // :hover rule is triggered. - return make_tuple("filled,dashed", "white", "#757575", "#757575"); - } - }(); + NodeColors node_colors = NodeColorsForScheme(color); return Printf( - R"(style="%s", fontcolor="%s", color="%s", fillcolor="%s")", style, - font_color, stroke_color, fill_color); + R"(style="%s", fontcolor="%s", color="%s", fillcolor="%s")", + node_colors.style, node_colors.font_color, node_colors.stroke_color, + node_colors.fill_color); } // Replaces <> with <>, so that this string is safe(er) for use in a @@ -604,11 +612,21 @@ tooltip = " "; StrAppend(&subcomp_label, "
", extra_info); } - // Subcomputation's fill/stroke color is light/dark red/gray, depending on - // whether or not the subcomputation's fusion node is highlighted. bool highlight = filter_.Highlight(parent_instr); - const char* fillcolor = highlight ? "#ffcdd2" : "#f5f5f5"; - const char* strokecolor = highlight ? "#b71c1c" : "#c2c2c2"; + const char* fillcolor; + const char* strokecolor; + if (debug_options_.xla_hlo_graph_sharding_color() && !highlight) { + // Use the sharding color, if the node isn't highlighted. + NodeColors node_colors = + NodeColorsForScheme(GetInstructionColor(parent_instr)); + fillcolor = node_colors.fill_color; + strokecolor = node_colors.stroke_color; + } else { + // Subcomputation's fill/stroke color is light/dark red/gray, depending on + // whether or not the subcomputation's fusion node is highlighted. + fillcolor = highlight ? "#ffcdd2" : "#f5f5f5"; + strokecolor = highlight ? "#b71c1c" : "#c2c2c2"; + } style = Printf(R"(style="rounded,filled,bold"; fillcolor="%s"; color="%s;")", fillcolor, strokecolor); -- GitLab From 4735af25c0edfdc012d16a09377161b48839d858 Mon Sep 17 00:00:00 2001 From: Brett Koonce Date: Thu, 1 Mar 2018 21:00:45 -0800 Subject: [PATCH 244/311] minor spelling tweaks for contrib/verbs docs --- tensorflow/contrib/verbs/README.md | 2 +- tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index 58fed4e5cb..4b6104a8b4 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -93,7 +93,7 @@ When the receiver receives the RDMA write, it will locate the relevant **RdmaTen 1. When the sender receives a tensor request, the source tensor may or may not be ready yet. The situation is handled through a process of tag matching: * If the request arrives before the tensor is ready, then a callback is put in a local table, and will be invoked once the tensor arrives. - * If the tensor is ready before the request arives, than the tensor is put in a local table. When the request arrives, it will invoke the callback immediately. + * If the tensor is ready before the request arrives, than the tensor is put in a local table. When the request arrives, it will invoke the callback immediately. In code it is done by calling **RecvLocalAsync()**, which receives the tensor's key, step-id, and the callback. 2. When the callback is invoked, the relevant tensor is removed from the tag matching table. In the case where we need to send the tensor's meta-data, the **RdmaTensorResponse** will store a copy of the tensor until the re-request arrives. 3. The sending of protocol messages (**RDMA_MESSAGE_TENSOR_REQUEST**, **RDMA_MESSAGE_META_DATA_RESPONSE** and **RDMA_MESSAGE_TENSOR_RE_REQUEST**) is done by the class **RdmaMessageBuffer**. All messages are sent using RDMA writes from/to fixed messages buffers. This implies that we cannot send on a specific channel more than one message at a time. In order to synchronize the messages, the **RdmaMessageBuffer** holds the a local and remote buffer statuses which can be either busy or idle. When a write is issued, both statuses will be changed to busy. When the write-complete event is received, the local status is changed to idle. When the write is received on the remote side, the remote side will parse the message, and return an ACK back to the sending side on which the sending side will update the remote status to idle. When both the local and remote statuses are idle, the next message can be sent. diff --git a/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md b/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md index 956b8f2147..da6fdd48e1 100644 --- a/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md +++ b/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md @@ -64,7 +64,7 @@ The protocol messages themselves will remain mostly unchanged at the first stage * type - The message type. * request_index - Request index. * is_dead/data_type/tensor_shape/tensor_bytes - The up-to-date meta-data. -* **RDMA_MESSAGE_BUFFER_RESPONSE** - (receiver ==> sender) Tensor re-requset after meta-data update and reallocation of result/proxy tensors. +* **RDMA_MESSAGE_BUFFER_RESPONSE** - (receiver ==> sender) Tensor re-request after meta-data update and reallocation of result/proxy tensors. * type - The message type. * name (name_size) - Name of the requested tensor. * step_id - Step ID. -- GitLab From 1401b731cc2df2ca48117216b5f91c9f2070ae3c Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 1 Mar 2018 22:25:41 -0800 Subject: [PATCH 245/311] Automated g4 rollback of changelist 187563544 PiperOrigin-RevId: 187582263 --- tensorflow/core/BUILD | 1 - tensorflow/core/grappler/optimizers/BUILD | 1 - .../grappler/optimizers/memory_optimizer.cc | 20 +++++-------- .../grappler/optimizers/memory_optimizer.h | 10 +++---- .../grappler/optimizers/meta_optimizer.cc | 4 +-- .../core/protobuf/rewriter_config.proto | 16 +++++----- .../python/grappler/memory_optimizer_test.py | 29 +------------------ 7 files changed, 25 insertions(+), 56 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 96e30ca3c0..3271825251 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2231,7 +2231,6 @@ cc_library( ], visibility = [ "//tensorflow/compiler:__subpackages__", - "//tensorflow/core/grappler:__subpackages__", "//tensorflow/core/profiler:__subpackages__", ], deps = [":lib_internal"], diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 0a4330b524..037438ee75 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -363,7 +363,6 @@ cc_library( ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", - "//tensorflow/core:regexp_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index d73050ac4d..694139fa50 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -36,7 +36,6 @@ limitations under the License. #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/grappler/utils/traversal.h" -#include "tensorflow/core/platform/regexp.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -414,7 +413,7 @@ void RecomputeSubgraph( } void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_regexp, + const string& recomputation_targets_name_prefix, GraphDef* graph, const GrapplerItem& item) { if (optimization_level != RewriterConfig::RECOMPUTATION_HEURISTICS && optimization_level != RewriterConfig::HEURISTICS && @@ -438,19 +437,16 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, for (const auto& feed : item.feed) { feeds.insert(NodeName(feed.first)); } - RE2 recomputation_targets_re(recomputation_targets_name_regexp); std::function is_target = - [&recomputation_targets_re](const NodeDef& node) { - // Nodes whose inputs we may want to recompute. This does a prefix - // regexp match, and typically one sets regexp="gradients/" meaning - // it will match all node names with scope beginning with "gradients/". - // If used within scopes, one may want to set regexp="(.+/)?gradients/". + [&recomputation_targets_name_prefix](const NodeDef& node) { + // Nodes whose inputs we may want to recompute. Typically targets will + // be gradients (recomputation_targets_name_prefix="gradients/"), + // although the prefix is configurable since gradients may be created + // in a name scope. // TODO(allenl): Use a static schedule // (grappler::EstimateEarliestExecutionTimes) to recompute only nodes // whose outputs will sit around for a while. - bool match = recomputation_targets_re.Match( - node.name(), 0, node.name().size(), RE2::ANCHOR_START, nullptr, 0); - return match; + return node.name().find(recomputation_targets_name_prefix) == 0; }; if (optimization_level == RewriterConfig::RECOMPUTATION_HEURISTICS || @@ -1229,7 +1225,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = item.graph; RecomputationRewritingPass(optimization_level_, - recomputation_targets_name_regexp_, + recomputation_targets_name_prefix_, optimized_graph, item); GrapplerItem optimized_item(item, std::move(*optimized_graph)); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index 62ab969848..c3dd0c45c6 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -27,14 +27,14 @@ class MemoryOptimizer : public GraphOptimizer { public: // optimization_level: Controls the level of autonomy for the memory // optimizer. See RewriterConfig::memory_optimization. - // recomputation_targets_name_regxp: Name regxp for potential outputs of + // recomputation_targets_name_prefix: Name prefix for potential outputs of // recomputations. See - // RewriterConfig::memory_optimizer_target_node_name_regxp. + // RewriterConfig::memory_optimizer_target_node_name_prefix. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_regexp = "gradients/") + const string& recomputation_targets_name_prefix = "gradients/") : optimization_level_(optimization_level), - recomputation_targets_name_regexp_(recomputation_targets_name_regexp) {} + recomputation_targets_name_prefix_(recomputation_targets_name_prefix) {} ~MemoryOptimizer() override {} string name() const override { return "memory_optimizer"; }; @@ -47,7 +47,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; - string recomputation_targets_name_regexp_; + string recomputation_targets_name_prefix_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 979f3e7161..72d7b94dc8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -119,7 +119,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, std::unique_ptr(new LayoutOptimizer())); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_regexp().empty()) { + if (cfg_.memory_optimizer_target_node_name_prefix().empty()) { optimizers.push_back(std::unique_ptr( // Use the default target node name prefix "gradients/" new MemoryOptimizer(cfg_.memory_optimization()))); @@ -127,7 +127,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back( std::unique_ptr(new MemoryOptimizer( cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_regexp()))); + cfg_.memory_optimizer_target_node_name_prefix()))); } } if (cfg_.auto_parallel().enable()) { diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 63303fa968..9ebf217811 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -78,14 +78,16 @@ message RewriterConfig { // effect on manually requested memory optimization passes in the optimizers // field. MemOptType memory_optimization = 4; - // A regexp for node names which are valid outputs of recomputations. Inputs - // to nodes that match this regexp may be recomputed (subject either to manual + // The prefix for nodes which are valid outputs of recomputations. Inputs to + // nodes with this name prefix may be recomputed (subject either to manual // annotation of those input nodes or to manual annotation and heuristics - // depending on memory_optimization), but the nodes themselves will not be - // recomputed. This is a prefix match, meaning it matches any node name that - // contains a prefix that matches this regexp. Defaults to "gradients/" if - // not provided, but can be changed if used within scopes. - string memory_optimizer_target_node_name_regexp = 6; + // depending on memory_optimization), but the prefixed nodes themselves will + // not be recomputed. Typically this will be "gradients/", indicating that + // activations from the forward pass of a graph may be recomputed as inputs to + // gradients, but may be adjusted if gradients are inside a name scope or if + // inputs to non-gradients should be recomputed. Defaults to "gradients/" if + // empty or not set. + string memory_optimizer_target_node_name_prefix = 6; // Configures AutoParallel optimization passes either through the // meta-optimizer or when manually specified through the optimizers field. diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 58d3c1e85f..948911f099 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -162,34 +162,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase): arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_regexp='optimizer/gradients/'), - original_metagraph) - self.assertGreater( - len(rewritten_graph_def.node), - len(original_metagraph.graph_def.node)) - self.assertEqual( - 0, - len([node for node in original_metagraph.graph_def.node - if 'Recomputed/' in node.name])) - self.assertEqual( - 20, # Two per layer - len([node for node in rewritten_graph_def.node - if 'Recomputed/' in node.name])) - - def testRewritingNameScopedGradientNamesRegexp(self): - """Tests that rewriting occurs with non-standard gradient names.""" - (original_metagraph, _, _, _) = self._GetMetaGraph( - optimizer_scope_name='foo/bar') - rewritten_graph_def = tf_optimizer.OptimizeGraph( - rewriter_config_pb2.RewriterConfig( - disable_model_pruning=True, - constant_folding=rewriter_config_pb2.RewriterConfig.OFF, - dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, - layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, - arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, - memory_optimization=rewriter_config_pb2.RewriterConfig. - RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_regexp='(.+/)gradients/'), + memory_optimizer_target_node_name_prefix='optimizer/gradients/'), original_metagraph) self.assertGreater( len(rewritten_graph_def.node), -- GitLab From 974822bcde764eb6a0b1498a575fdde7001aae15 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 01:17:19 -0800 Subject: [PATCH 246/311] [XLA:GPU] Extract multiplication of complex numbers into a helper function. Also add helper functions for getting the real and the imaginary part of a complex number. PiperOrigin-RevId: 187593341 --- .../compiler/xla/service/gpu/ir_emitter.cc | 65 +++++++++++-------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index a3df67a873..1e0db2821a 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include "tensorflow/core/platform/logging.h" // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc" @@ -438,6 +439,32 @@ Status IrEmitter::HandleSelect(HloInstruction* select) { return IrEmitter::DefaultAction(select); } +namespace { +llvm::Value* Real(llvm::Value* x, llvm::IRBuilder<>* ir_builder) { + return ir_builder->CreateExtractValue(x, {0}); +} + +llvm::Value* Imag(llvm::Value* x, llvm::IRBuilder<>* ir_builder) { + return ir_builder->CreateExtractValue(x, {1}); +} + +std::pair MultiplyComplex( + llvm::Value* lhs_value, llvm::Value* rhs_value, + llvm::IRBuilder<>* ir_builder) { + llvm::Value* lhs_real = Real(lhs_value, ir_builder); + llvm::Value* lhs_imag = Imag(lhs_value, ir_builder); + llvm::Value* rhs_real = Real(rhs_value, ir_builder); + llvm::Value* rhs_imag = Imag(rhs_value, ir_builder); + llvm::Value* real_result1 = ir_builder->CreateFMul(lhs_real, rhs_real); + llvm::Value* real_result2 = ir_builder->CreateFMul(lhs_imag, rhs_imag); + llvm::Value* real_result = ir_builder->CreateFSub(real_result1, real_result2); + llvm::Value* imag_result1 = ir_builder->CreateFMul(lhs_real, rhs_imag); + llvm::Value* imag_result2 = ir_builder->CreateFMul(lhs_imag, rhs_real); + llvm::Value* imag_result = ir_builder->CreateFAdd(imag_result1, imag_result2); + return {real_result, imag_result}; +} +} // namespace + Status IrEmitter::HandleDot(HloInstruction* dot) { auto lhs_instruction = dot->operand(0); auto rhs_instruction = dot->operand(1); @@ -456,21 +483,10 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { rhs_array.EmitReadArrayElement(/*index=*/{}, &ir_builder_); llvm::Value* result; if (ShapeUtil::ElementIsComplex(lhs_shape)) { - auto real = [&](llvm::Value* x) { - return ir_builder_.CreateExtractValue(x, {0}); - }; - auto imag = [&](llvm::Value* x) { - return ir_builder_.CreateExtractValue(x, {1}); - }; - llvm::Value* real_result = ir_builder_.CreateFSub( - ir_builder_.CreateFMul(real(lhs_value), real(rhs_value)), - ir_builder_.CreateFMul(imag(lhs_value), imag(rhs_value))); - llvm::Value* imag_result = ir_builder_.CreateFAdd( - ir_builder_.CreateFMul(real(lhs_value), imag(rhs_value)), - ir_builder_.CreateFMul(imag(lhs_value), real(rhs_value))); + auto value = MultiplyComplex(lhs_value, rhs_value, &ir_builder_); result = llvm::ConstantAggregateZero::get(lhs_array.GetElementLlvmType()); - result = ir_builder_.CreateInsertValue(result, real_result, {0}); - result = ir_builder_.CreateInsertValue(result, imag_result, {1}); + result = ir_builder_.CreateInsertValue(result, value.first, {0}); + result = ir_builder_.CreateInsertValue(result, value.second, {1}); } else { result = ir_builder_.CreateFMul(lhs_value, rhs_value); } @@ -548,20 +564,13 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { llvm::Value* accum = ir_builder_.CreateLoad(accum_address); llvm::Value* updated_accum; if (ShapeUtil::ElementIsComplex(lhs_shape)) { -#define REAL(x) ir_builder_.CreateExtractValue(x, {0}) -#define IMAG(x) ir_builder_.CreateExtractValue(x, {1}) - llvm::Value* product_real = ir_builder_.CreateFSub( - ir_builder_.CreateFMul(REAL(lhs_element), REAL(rhs_element)), - ir_builder_.CreateFMul(IMAG(lhs_element), IMAG(rhs_element))); - llvm::Value* product_imag = ir_builder_.CreateFAdd( - ir_builder_.CreateFMul(REAL(lhs_element), IMAG(rhs_element)), - ir_builder_.CreateFMul(IMAG(lhs_element), REAL(rhs_element))); - updated_accum = ir_builder_.CreateInsertValue( - accum, ir_builder_.CreateFAdd(REAL(accum), product_real), {0}); - updated_accum = ir_builder_.CreateInsertValue( - updated_accum, ir_builder_.CreateFAdd(IMAG(accum), product_imag), {1}); -#undef IMAG -#undef REAL + auto value = MultiplyComplex(lhs_element, rhs_element, &ir_builder_); + llvm::Value* accum_real = Real(accum, &ir_builder_); + llvm::Value* real_sum = ir_builder_.CreateFAdd(accum_real, value.first); + updated_accum = ir_builder_.CreateInsertValue(accum, real_sum, {0}); + llvm::Value* accum_imag = Imag(accum, &ir_builder_); + llvm::Value* imag_sum = ir_builder_.CreateFAdd(accum_imag, value.second); + updated_accum = ir_builder_.CreateInsertValue(updated_accum, imag_sum, {1}); } else { llvm::Value* product = ir_builder_.CreateFMul(lhs_element, rhs_element); updated_accum = ir_builder_.CreateFAdd(accum, product); -- GitLab From 353dbff0cbabe8d8b38530b13669271b4d047c9b Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 2 Mar 2018 01:48:59 -0800 Subject: [PATCH 247/311] Java: Update to 1.6.0 PiperOrigin-RevId: 187595636 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index d35bb41112..1c84eae540 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index d9ba1bbbfb..cf1a7b6c9c 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index f6f532c2c1..b202dcd5c7 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 0a6b3d23d7..606805ff33 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 1d8e872373..c6bba4e536 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ proto diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index 5c1b55085c..a22663f9f3 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ tensorflow -- GitLab From 2d3e25245ec4dc2b791212b65b17a7ff4051dfe3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 05:50:55 -0800 Subject: [PATCH 248/311] Add support to convert ResourceVariables of graphs into constants. This involves a change to the implementation of convert_variables_to_constants. PiperOrigin-RevId: 187610062 --- tensorflow/python/BUILD | 1 + .../python/framework/graph_util_impl.py | 18 ++- .../python/framework/graph_util_test.py | 106 ++++++++++-------- 3 files changed, 76 insertions(+), 49 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index b0cb48c80c..fbdf15a69f 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3654,6 +3654,7 @@ py_test( ":framework_for_generated_wrappers", ":math_ops", ":state_ops_gen", + ":variable_scope", ":variables", "//tensorflow/core:protos_all_py", ], diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index 5a543317e6..910364364c 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -235,7 +235,7 @@ def convert_variables_to_constants(sess, variable_names = [] variable_dict_names = [] for node in inference_graph.node: - if node.op in ["Variable", "VariableV2"]: + if node.op in ["Variable", "VariableV2", "VarHandleOp"]: variable_name = node.name if ((variable_names_whitelist is not None and variable_name not in variable_names_whitelist) or @@ -243,7 +243,10 @@ def convert_variables_to_constants(sess, variable_name in variable_names_blacklist)): continue variable_dict_names.append(variable_name) - variable_names.append(variable_name + ":0") + if node.op == "VarHandleOp": + variable_names.append(variable_name + "/Read/ReadVariableOp:0") + else: + variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: @@ -266,6 +269,17 @@ def convert_variables_to_constants(sess, tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 + elif input_node.op == "ReadVariableOp" and ( + input_node.input[0] in found_variables): + # The preceding branch converts all VarHandleOps of ResourceVariables to + # constants, so we need to convert the associated ReadVariableOps to + # Identity ops. + output_node.op = "Identity" + output_node.name = input_node.name + output_node.input.extend([input_node.input[0]]) + output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) + if "_class" in input_node.attr: + output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index 1cdd738198..b618152b02 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import gen_state_ops from tensorflow.python.ops import math_ops # pylint: disable=unused-import from tensorflow.python.ops import math_ops as math_ops_lib +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -226,52 +227,62 @@ class DeviceFunctionsTest(test.TestCase): constant_graph_def.library) def testConvertVariablesToConsts(self): - with ops.Graph().as_default(): - variable_node = variables.Variable(1.0, name="variable_node") - _ = variables.Variable(1.0, name="unused_variable_node") - output_node = math_ops_lib.multiply( - variable_node, 2.0, name="output_node") - with session.Session() as sess: - init = variables.initialize_variables([variable_node]) - sess.run(init) - output = sess.run(output_node) - self.assertNear(2.0, output, 0.00001) - variable_graph_def = sess.graph.as_graph_def() - # First get the constant_graph_def when variable_names_whitelist is set, - # note that if variable_names_whitelist is not set an error will be - # thrown because unused_variable_node is not initialized. - constant_graph_def = graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node"], - variable_names_whitelist=set(["variable_node"])) + self._test_variable_to_const_conversion(use_resource=False) - # Then initialize the unused variable, and get another - # constant_graph_def when variable_names_whitelist is not set. - sess.run(variables.global_variables_initializer()) - constant_graph_def_without_variable_whitelist = ( - graph_util.convert_variables_to_constants(sess, variable_graph_def, - ["output_node"])) - - # The unused variable should be cleared so the two graphs should be - # equivalent. - self.assertEqual( - str(constant_graph_def), - str(constant_graph_def_without_variable_whitelist)) - - # Test variable name black list. This should result in the variable not - # being a const. - sess.run(variables.global_variables_initializer()) - constant_graph_def_with_blacklist = ( - graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node"], - variable_names_blacklist=set(["variable_node"]))) - variable_node = None - for node in constant_graph_def_with_blacklist.node: - if node.name == "variable_node": - variable_node = node - self.assertIsNotNone(variable_node) - self.assertEqual(variable_node.op, "VariableV2") + def testConvertResourceVariablesToConsts(self): + self._test_variable_to_const_conversion(use_resource=True) + + def _test_variable_to_const_conversion(self, use_resource): + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=use_resource): + variable_node = variable_scope.get_variable( + "variable_node", initializer=1.0) + another_variable = variable_scope.get_variable( + "unused_variable_node", initializer=1.0) + output_node = math_ops_lib.multiply( + variable_node, 2.0, name="output_node") + with session.Session() as sess: + sess.run(variable_node.initializer) + output = sess.run(output_node) + self.assertNear(2.0, output, 0.00001) + variable_graph_def = sess.graph.as_graph_def() + # First get the constant_graph_def when variable_names_whitelist is + # set, note that if variable_names_whitelist is not set an error will + # be thrown because unused_variable_node is not initialized. + constant_graph_def = graph_util.convert_variables_to_constants( + sess, + variable_graph_def, ["output_node"], + variable_names_whitelist=set(["variable_node"])) + + # Then initialize the unused variable, and get another + # constant_graph_def when variable_names_whitelist is not set. + sess.run(another_variable.initializer) + constant_graph_def_without_variable_whitelist = ( + graph_util.convert_variables_to_constants( + sess, variable_graph_def, ["output_node"])) + + # The unused variable should be cleared so the two graphs should be + # equivalent. + self.assertEqual( + str(constant_graph_def), + str(constant_graph_def_without_variable_whitelist)) + + # Test variable name black list. This should result in the variable + # not being a const. + constant_graph_def_with_blacklist = ( + graph_util.convert_variables_to_constants( + sess, + variable_graph_def, ["output_node"], + variable_names_blacklist=set(["variable_node"]))) + variable_node = None + for node in constant_graph_def_with_blacklist.node: + if node.name == "variable_node": + variable_node = node + self.assertIsNotNone(variable_node) + if use_resource: + self.assertEqual(variable_node.op, "VarHandleOp") + else: + self.assertEqual(variable_node.op, "VariableV2") # Now we make sure the variable is now a constant, and that the graph still # produces the expected result. @@ -279,8 +290,9 @@ class DeviceFunctionsTest(test.TestCase): _ = importer.import_graph_def(constant_graph_def, name="") self.assertEqual(4, len(constant_graph_def.node)) for node in constant_graph_def.node: - self.assertNotEqual("Variable", node.op) - self.assertNotEqual("VariableV2", node.op) + self.assertNotIn( + node.op, + ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"]) with session.Session() as sess: output_node = sess.graph.get_tensor_by_name("output_node:0") output = sess.run(output_node) -- GitLab From 95be42c41c77aed8dd811398332687f45105c926 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 10:18:40 -0500 Subject: [PATCH 249/311] Remove underscore prefix from gen_array_ops._unique_with_counts --- tensorflow/python/ops/array_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index e537787398..e0bcac0641 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1326,10 +1326,10 @@ def unique_with_counts(x, out_idx=dtypes.int32, name=None): # period (3 weeks) pass. # TODO(yongtang): The documentation should also # be updated when switch to v2. - return gen_array_ops._unique_with_counts(x, out_idx, name) + return gen_array_ops.unique_with_counts(x, out_idx, name) -unique_with_counts.__doc__ = gen_array_ops._unique_with_counts.__doc__ +unique_with_counts.__doc__ = gen_array_ops.unique_with_counts.__doc__ @tf_export("split") -- GitLab From 7b7ce88a073530dd3ea6ec5ee329fb45dd64b06b Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 10:32:27 -0500 Subject: [PATCH 250/311] Remove underscore prefix from gen_array_ops._unique_with_counts_v2 --- tensorflow/python/kernel_tests/unique_op_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 3c9650ef6e..bbc040dc13 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -137,10 +137,10 @@ class UniqueWithCountsTest(test.TestCase): for dtype in [np.int32, np.int64]: x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) with self.test_session() as sess: - y0, idx0, count0 = gen_array_ops._unique_with_counts_v2( + y0, idx0, count0 = gen_array_ops.unique_with_counts_v2( x, axis=np.array([0], dtype)) tf_y0, tf_idx0, tf_count0 = sess.run([y0, idx0, count0]) - y1, idx1, count1 = gen_array_ops._unique_with_counts_v2( + y1, idx1, count1 = gen_array_ops.unique_with_counts_v2( x, axis=np.array([1], dtype)) tf_y1, tf_idx1, tf_count1 = sess.run([y1, idx1, count1]) self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) @@ -155,7 +155,7 @@ class UniqueWithCountsTest(test.TestCase): # by default, the axis will be wrapped to allow `axis=None`. x = np.random.randint(2, high=10, size=7000) with self.test_session() as sess: - y, idx, count = gen_array_ops._unique_with_counts_v2( + y, idx, count = gen_array_ops.unique_with_counts_v2( x, axis=np.array([], np.int32)) tf_y, tf_idx, tf_count = sess.run([y, idx, count]) -- GitLab From 60740a489475365815c50d5b0d3c352d420454ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 08:20:27 -0800 Subject: [PATCH 251/311] Eliminate the creation of unnecessary read ops when working with ResourceVariables. In particular: 1. Don't create additional read ops when creating a ResourceVariable from a VariableDef proto. 2. Expose the ability to assign a ResourceVariable without reading & returning the new value. 3. Colocating with a ResourceVariable's ".op" property eliminates the creation of additional read ops. 4. Savers can read a variable's value using the _graph_element property, since these reads don't need control dependencies. This makes the visualization of graphs on TensorBoard much nicer. PiperOrigin-RevId: 187622122 --- tensorflow/contrib/framework/BUILD | 1 + tensorflow/python/BUILD | 1 + .../python/framework/meta_graph_test.py | 14 ---- .../resource_variable_ops_test.py | 45 ++++++++++ .../python/ops/resource_variable_ops.py | 82 +++++++++++++++---- .../python/training/checkpoint_utils.py | 9 +- .../python/training/checkpoint_utils_test.py | 26 ++++++ tensorflow/python/training/saver.py | 10 ++- tensorflow/python/training/saver_test.py | 18 ++++ 9 files changed, 171 insertions(+), 35 deletions(-) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index 50868c6d6c..ac043fda06 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -62,6 +62,7 @@ tf_custom_op_py_library( "//tensorflow/python:math_ops", "//tensorflow/python:platform", "//tensorflow/python:pywrap_tensorflow", + "//tensorflow/python:resource_variable_ops", "//tensorflow/python:script_ops", "//tensorflow/python:smart_cond", "//tensorflow/python:sparse_tensor", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index fbdf15a69f..cb54cebf0f 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3954,6 +3954,7 @@ py_test( ":partitioned_variables", ":platform", ":pywrap_tensorflow", + ":resource_variable_ops", ":state_ops", ":training", ":variable_scope", diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 19dcd6a1b3..21963d0bee 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -905,20 +905,6 @@ class ExportImportAcrossScopesTest(test.TestCase): with variable_scope.variable_scope("importA/keepA"): graph_fn(use_resource=use_resource) - if use_resource: - # Bringing in collections that contain ResourceVariables will adds ops - # to the graph the first time a variable is encountered, so mimic the - # same behavior. - seen_variables = set() - for collection_key in sorted([ - ops.GraphKeys.GLOBAL_VARIABLES, - ops.GraphKeys.TRAINABLE_VARIABLES, - ]): - for var in expected_graph.get_collection(collection_key): - if var not in seen_variables: - var._read_variable_op() - seen_variables.add(var) - result = meta_graph.export_scoped_meta_graph(graph=imported_graph)[0] expected = meta_graph.export_scoped_meta_graph(graph=expected_graph)[0] diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 8503f3e031..71699fe0ad 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -277,6 +277,20 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.evaluate(v.assign(2.0)) self.assertEqual(2.0, self.evaluate(v.value())) + # Tests for the 'read_value' argument: + assign_with_read = v.assign(3.0, read_value=True) + if context.in_graph_mode(): + self.assertEqual(3.0, assign_with_read.eval()) + else: + self.assertEqual(3.0, self.evaluate(assign_with_read)) + assign_without_read = v.assign(4.0, read_value=False) + if context.in_graph_mode(): + self.assertIsInstance(assign_without_read, ops.Operation) + else: + self.assertIsNone(assign_without_read) + self.evaluate(assign_without_read) + self.assertEqual(4.0, self.evaluate(v.value())) + @test_util.run_in_graph_and_eager_modes() def testLoad(self): v = resource_variable_ops.ResourceVariable(1.0, name="var0") @@ -329,6 +343,9 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): w = resource_variable_ops.ResourceVariable.from_proto(v.to_proto()) self.assertEquals(2, math_ops.add(w, 1).eval()) + self.assertEquals(v._handle, w._handle) + self.assertEquals(v._graph_element, w._graph_element) + @test_util.run_in_graph_and_eager_modes() def testAssignAddMethod(self): v = resource_variable_ops.ResourceVariable(1.0, name="var0") @@ -336,6 +353,20 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.evaluate(v.assign_add(1.0)) self.assertEqual(2.0, self.evaluate(v.value())) + # Tests for the 'read_value' argument: + assign_with_read = v.assign_add(1.0, read_value=True) + if context.in_graph_mode(): + self.assertEqual(3.0, assign_with_read.eval()) + else: + self.assertEqual(3.0, self.evaluate(assign_with_read)) + assign_without_read = v.assign_add(1.0, read_value=False) + if context.in_graph_mode(): + self.assertIsInstance(assign_without_read, ops.Operation) + else: + self.assertIsNone(assign_without_read) + self.evaluate(assign_without_read) + self.assertEqual(4.0, self.evaluate(v.value())) + @test_util.run_in_graph_and_eager_modes() def testAssignSubMethod(self): v = resource_variable_ops.ResourceVariable(3.0, name="var0") @@ -343,6 +374,20 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.evaluate(v.assign_sub(1.0)) self.assertEqual(2.0, self.evaluate(v.value())) + # Tests for the 'read_value' argument: + assign_with_read = v.assign_sub(1.0, read_value=True) + if context.in_graph_mode(): + self.assertEqual(1.0, assign_with_read.eval()) + else: + self.assertEqual(1.0, self.evaluate(assign_with_read)) + assign_without_read = v.assign_sub(1.0, read_value=False) + if context.in_graph_mode(): + self.assertIsInstance(assign_without_read, ops.Operation) + else: + self.assertIsNone(assign_without_read) + self.evaluate(assign_without_read) + self.assertEqual(0.0, self.evaluate(v.value())) + @test_util.run_in_graph_and_eager_modes() def testDestroyResource(self): v = resource_variable_ops.ResourceVariable(3.0, name="var0") diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 2d6d0672e0..bf186f1734 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -534,7 +534,8 @@ class ResourceVariable(variables.Variable): self._save_slice_info = None self._caching_device = None self._dtype = dtypes.as_dtype(self._handle.op.get_attr("dtype")) - self._graph_element = self.value() + self._graph_element = g.get_tensor_by_name( + self._handle.op.name + "/Read/ReadVariableOp:0") self._constraint = None def __nonzero__(self): @@ -788,20 +789,52 @@ class ResourceVariable(variables.Variable): __array_priority__ = 100 - def assign_sub(self, delta, use_locking=None, name=None): + def assign_sub(self, delta, use_locking=None, name=None, read_value=True): + """Subtracts a value from this variable. + + Args: + delta: A `Tensor`. The value to subtract from this variable. + use_locking: If `True`, use locking during the operation. + name: The name to use for the operation. + read_value: A `bool`. Whether to read and return the new value of the + variable or not. + + Returns: + If `read_value` is `True`, this method will return the new value of the + variable after the assignment has completed. Otherwise, when in graph mode + it will return the `Operation` that does the assignment, and when in eager + mode it will return `None`. + """ # TODO(apassos): this here and below is not atomic. Consider making it # atomic if there's a way to do so without a performance cost for those who # don't need it. - return self._lazy_read(gen_resource_variable_ops.assign_sub_variable_op( - self.handle, - ops.convert_to_tensor(delta, dtype=self.dtype), - name=name)) + assign_sub_op = gen_resource_variable_ops.assign_sub_variable_op( + self.handle, ops.convert_to_tensor(delta, dtype=self.dtype), name=name) + if read_value: + return self._lazy_read(assign_sub_op) + return assign_sub_op + + def assign_add(self, delta, use_locking=None, name=None, read_value=True): + """Adds a value to this variable. + + Args: + delta: A `Tensor`. The value to add to this variable. + use_locking: If `True`, use locking during the operation. + name: The name to use for the operation. + read_value: A `bool`. Whether to read and return the new value of the + variable or not. - def assign_add(self, delta, use_locking=None, name=None): - return self._lazy_read(gen_resource_variable_ops.assign_add_variable_op( - self.handle, - ops.convert_to_tensor(delta, dtype=self.dtype), - name=name)) + Returns: + If `read_value` is `True`, this method will return the new value of the + variable after the assignment has completed. Otherwise, when in graph mode + it will return the `Operation` that does the assignment, and when in eager + mode it will return `None`. + """ + assign_add_op = gen_resource_variable_ops.assign_add_variable_op( + self.handle, ops.convert_to_tensor(delta, dtype=self.dtype), name=name) + if read_value: + return self._lazy_read(assign_add_op) + return assign_add_op def _lazy_read(self, op): if hasattr(self, "_trainable") and self._trainable: @@ -811,14 +844,29 @@ class ResourceVariable(variables.Variable): self._in_graph_mode, self._handle_deleter if not self._in_graph_mode else None, op) - def assign(self, value, use_locking=None, name=None): + def assign(self, value, use_locking=None, name=None, read_value=True): + """Assigns a new value to this variable. + + Args: + value: A `Tensor`. The new value for this variable. + use_locking: If `True`, use locking during the assignment. + name: The name to use for the assignment. + read_value: A `bool`. Whether to read and return the new value of the + variable or not. + + Returns: + If `read_value` is `True`, this method will return the new value of the + variable after the assignment has completed. Otherwise, when in graph mode + it will return the `Operation` that does the assignment, and when in eager + mode it will return `None`. + """ value_tensor = ops.convert_to_tensor(value, dtype=self.dtype) self._shape.assert_is_compatible_with(value_tensor.shape) - return self._lazy_read( - gen_resource_variable_ops.assign_variable_op( - self.handle, - value_tensor, - name=name)) + assign_op = gen_resource_variable_ops.assign_variable_op( + self.handle, value_tensor, name=name) + if read_value: + return self._lazy_read(assign_op) + return assign_op def _strided_slice_assign(self, begin, end, strides, value, name, begin_mask, end_mask, ellipsis_mask, new_axis_mask, diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py index 0af1cdecfa..52d092bc22 100644 --- a/tensorflow/python/training/checkpoint_utils.py +++ b/tensorflow/python/training/checkpoint_utils.py @@ -23,6 +23,7 @@ import six from tensorflow.python import pywrap_tensorflow from tensorflow.python.framework import ops from tensorflow.python.ops import io_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables @@ -289,10 +290,14 @@ def _set_checkpoint_initializer(variable, name: Name of the operation. """ base_type = variable.dtype.base_dtype - with ops.colocate_with(variable): + with ops.colocate_with(variable.op): restore_op = io_ops.restore_v2( ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0] - variable._initializer_op = state_ops.assign(variable, restore_op) # pylint:disable=protected-access + if isinstance(variable, resource_variable_ops.ResourceVariable): + init_op = variable.assign(restore_op, read_value=False) + else: + init_op = state_ops.assign(variable, restore_op) + variable._initializer_op = init_op # pylint:disable=protected-access restore_op.set_shape(variable.shape) variable._initial_value = restore_op # pylint:disable=protected-access diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py index a461b24cbb..640bd665cb 100644 --- a/tensorflow/python/training/checkpoint_utils_test.py +++ b/tensorflow/python/training/checkpoint_utils_test.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -362,6 +363,31 @@ class CheckpointsTest(test.TestCase): checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"useful_scope": "some_scope/"}) + def testNoAdditionalReadOpsForResourceVariables(self): + checkpoint_dir = self.get_temp_dir() + with self.test_session() as session: + v1, _, _, _ = _create_checkpoints(session, checkpoint_dir) + + # New graph and session. + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as session: + my1 = resource_variable_ops.ResourceVariable([[0.0] * 10], name="my1") + + with ops.name_scope("init_from_checkpoint"): + checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"var1": my1}) + + # Basic sanity checks: + session.run(variables.global_variables_initializer()) + self.assertAllEqual(session.run(my1), v1) + + ops_in_init_from_checkpoint_scope = [ + op for op in g.get_operations() + if (op.name.startswith("init_from_checkpoint/") and + not op.name.startswith("init_from_checkpoint/checkpoint_initializer" + ) and op.type != "AssignVariableOp") + ] + self.assertEqual(ops_in_init_from_checkpoint_scope, []) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index e8ea5abfbd..6c80562968 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -584,7 +584,10 @@ class BaseSaverBuilder(object): else: if context.in_graph_mode(): if convert_variable_to_tensor: - var = ops.internal_convert_to_tensor(var, as_ref=True) + if isinstance(var, resource_variable_ops.ResourceVariable): + var = var._graph_element # pylint: disable=protected-access + else: + var = ops.internal_convert_to_tensor(var, as_ref=True) if not BaseSaverBuilder._IsVariable(var): raise TypeError("Variable to save is not a Variable: %s" % var) if var.op.type == "ReadVariableOp": @@ -674,7 +677,10 @@ class BaseSaverBuilder(object): "mode is enabled, type: %s." % type(op)) saveable = BaseSaverBuilder.ResourceVariableSaveable(op, "", name) else: - variable = ops.internal_convert_to_tensor(op, as_ref=True) + if isinstance(op, resource_variable_ops.ResourceVariable): + variable = op._graph_element # pylint: disable=protected-access + else: + variable = ops.internal_convert_to_tensor(op, as_ref=True) if not BaseSaverBuilder._IsVariable(variable): raise TypeError("names_to_saveables must be a dict mapping string " "names to Tensors/Variables. Not a variable: %s" % diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index b758ceaab0..7947765449 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -262,6 +262,24 @@ class SaverTest(test.TestCase): save2.restore(sess, save_path) self.assertEquals(self.evaluate(v), [1]) + def testNoAdditionalOpsAddedBySaverForResourceVariablesOutsideSaveScope(self): + with ops_lib.Graph().as_default() as g: + v = resource_variable_ops.ResourceVariable(1.0, name="v") + with ops_lib.name_scope("saver1"): + saver_module.Saver() + with ops_lib.name_scope("saver2"): + saver_module.Saver({"name": v}) + ops_in_saver1_scope_but_not_save_scope = [ + op for op in g.get_operations() + if (op.name.startswith("saver1/") and + not op.name.startswith("saver1/save/"))] + self.assertEqual(ops_in_saver1_scope_but_not_save_scope, []) + ops_in_saver2_scope_but_not_save_scope = [ + op for op in g.get_operations() + if (op.name.startswith("saver2/") and + not op.name.startswith("saver2/save/"))] + self.assertEqual(ops_in_saver2_scope_but_not_save_scope, []) + def testSaveCopyRestoreWithSaveRelativePaths(self): """Save, copy checkpoint dir and restore from copied dir. -- GitLab From 84fe908258550e1ce27e8725de1e2af279479c9d Mon Sep 17 00:00:00 2001 From: Minmin Sun Date: Sat, 3 Mar 2018 00:26:31 +0800 Subject: [PATCH 252/311] =?UTF-8?q?Add=20LINM=20(Loop=20Invariant=20Node?= =?UTF-8?q?=20Motion)=20optimization=20pass=20in=20GraphOptim=E2=80=A6=20(?= =?UTF-8?q?#16306)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add Loop Invariant Node Motion optimization in grappler * linm: disable loop optimizations by default, remove includes not needed from loop_optimizer_test.cc * remove redundant lines after merging with master * LINM: a minor change in BUILD to fix gen_ci_sanity_out failure, and remove 'No newline at end of file' warning --- tensorflow/core/grappler/optimizers/BUILD | 2 + .../grappler/optimizers/loop_optimizer.cc | 382 +++++++++++++- .../core/grappler/optimizers/loop_optimizer.h | 26 + .../optimizers/loop_optimizer_test.cc | 489 +++++++++++++++++- .../grappler/optimizers/meta_optimizer.cc | 8 +- 5 files changed, 901 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index a52d1c8df2..0a72a68a66 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -480,6 +480,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ + ":constant_folding", ":graph_optimizer", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -489,6 +490,7 @@ cc_library( "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/utils:frame", ], ) diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 102526e22f..0223930d74 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -15,23 +15,403 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/loop_optimizer.h" +#include +#include #include #include +#include +#include +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/tensor_coding.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/saved_tensor_slice_util.h" + +using tensorflow::strings::StrCat; namespace tensorflow { namespace grappler { +Status LoopOptimizer::LINMHandleInvariantEnter(NodeDef* node, + const int num_outputs) { + auto consumers = node_map_->GetOutputs(node->name()); + std::vector enter_control_inputs; + string enter_input; + for (auto& input : node->input()) { + if (IsControlInput(input)) { + enter_control_inputs.push_back(input); + } else { + enter_input = input; + } + } + for (auto* consumer : consumers) { + if (invariant_nodes_.count(consumer)) { + for (int i = 0; i < consumer->input_size(); ++i) { + if (NodeName(consumer->input(i)) == node->name()) { + consumer->set_input(i, enter_input); + node_map_->AddOutput(NodeName(enter_input), consumer->name()); + node_map_->RemoveOutput(node->name(), consumer->name()); + } + } + for (auto& control_input : enter_control_inputs) { + consumer->add_input(control_input); + node_map_->AddOutput(NodeName(control_input), consumer->name()); + } + } + } + return Status::OK(); +} + +Status LoopOptimizer::LINMHandleConst(NodeDef* node, + const int num_outputs, const int frame_id) { + NodeDef* const_node; + if (num_outputs == 0) { + // all successor nodes are invariant + // Remove the control inputs from this frame to the const node, + // when moving it out of the frame (in parent frame) + const_node = node; + node_map_->RemoveInputs(node->name()); + node->clear_input(); + } else { + // some successor nodes are variant + // Have to keep the const node in the frame, + // so create a new one outside the frame (in parent frame) + const_node = optimized_graph_->add_node(); + const_node->set_name(AddPrefixToNodeName(node->name(), kLoopOptimizer)); + const_node->set_op("Const"); + const_node->set_device(node->device()); + *const_node->mutable_attr() = node->attr(); + node_map_->AddNode(const_node->name(), const_node); + auto consumers = node_map_->GetOutputs(node->name()); + for (auto* consumer : consumers) { + if (invariant_nodes_.count(consumer)) { + for (int i = 0; i < consumer->input_size(); ++i) { + if (NodeName(consumer->input(i)) == node->name()) { + if (IsControlInput(consumer->input(i))) { + *consumer->mutable_input(i) = AsControlDependency(*const_node); + } else { + *consumer->mutable_input(i) = const_node->name(); + } + node_map_->AddOutput(const_node->name(), consumer->name()); + node_map_->RemoveOutput(node->name(), consumer->name()); + } + } + } + } + } + // add a control input from the parent frame + auto parent_it = frame_parent_.find(frame_id); + if (parent_it != frame_parent_.end()) { + int parent_id = parent_it->second; + auto loop_cond_it = loop_cond_.find(parent_id); + if (loop_cond_it == loop_cond_.end()) { + return errors::InvalidArgument( + "Frame ", frame_id, " doesn't have a LoopCond node"); + } + auto& loop_cond_name = loop_cond_it->second->name(); + NodeDef* switch_node = nullptr; + for (auto* node : node_map_->GetOutputs(loop_cond_name)) { + if (node->op() == "Switch") { + switch_node = node; + break; + } + } + if (!switch_node) { + return errors::InvalidArgument( + "LoopCond node of Frame ", frame_id, + " doesn't connect to any Switch node"); + } + string switch_output = StrCat(switch_node->name(), ":1"); + const string ctrl_dep = ConstantFolding::AddControlDependency( + switch_output, optimized_graph_, node_map_.get()); + const_node->add_input(ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), const_node->name()); + } + return Status::OK(); +} + +Status LoopOptimizer::LINMHandleInvariantNode(NodeDef* node, + const int num_outputs, const int frame_id) { + // have to remove control inputs to the invariant node from the same frame + // when moving this node out of this frame + for (int i = 0; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) { + node->mutable_input()->SwapElements(i, node->input_size() - 1); + node->mutable_input()->RemoveLast(); + } + } + if (num_outputs == 0) { + return Status::OK(); + } + + DataTypeVector input_types; + DataTypeVector output_types; + OpRegistryInterface* op_registry = OpRegistry::Global(); + const OpRegistrationData* op_reg_data = nullptr; + TF_RETURN_IF_ERROR( + op_registry->LookUp(node->op(), &op_reg_data)); + TF_RETURN_IF_ERROR( + InOutTypesForNode(*node, op_reg_data->op_def, + &input_types, &output_types)); + + auto consumers = node_map_->GetOutputs(node->name()); + string fname = invariant_enters_[frame_id][0]->attr().at("frame_name").s(); + int piterations = invariant_enters_[frame_id][0] + ->attr().at("parallel_iterations").i(); + for (auto* consumer : consumers) { + if (!invariant_nodes_.count(consumer)) { + for (int i = 0; i < consumer->input_size(); ++i) { + int port; + string node_name = ParseNodeName(consumer->input(i), &port); + if (node_name != node->name()) { + continue; + } + if (port < 0) { + return errors::InvalidArgument( + "Invariant node should not have control outputs " + "to variant node"); + } + DataType output_type = output_types[port]; + NodeDef* new_enter = optimized_graph_->add_node(); + new_enter->set_op("Enter"); + new_enter->set_device(node->device()); + new_enter->set_name(AddPrefixToNodeName( + StrCat(fname, "_enter_", new_enter_id_++), kLoopOptimizer)); + AttrValue data_type; + data_type.set_type(output_type); + new_enter->mutable_attr()->insert({"T", data_type}); + AttrValue frame_name; + frame_name.set_s(fname); + new_enter->mutable_attr()->insert({"frame_name", frame_name}); + AttrValue is_const; + is_const.set_b(true); + new_enter->mutable_attr()->insert({"is_constant", is_const}); + AttrValue parallel_iterations; + parallel_iterations.set_i(piterations); + new_enter->mutable_attr()->insert( + {"parallel_iterations", parallel_iterations}); + new_enter->add_input(consumer->input(i)); + *consumer->mutable_input(i) = new_enter->name(); + node_map_->AddNode(new_enter->name(), new_enter); + node_map_->AddOutput(node->name(), new_enter->name()); + node_map_->AddOutput(new_enter->name(), consumer->name()); + } + } + } + return Status::OK(); +} + +Status LoopOptimizer::MoveInvariantNodes(const int frame_id) { + for (auto iter = invariant_nodes_.begin(); + iter != invariant_nodes_.end(); ++iter) { + auto* invariant_node = iter->first; + const int num_outputs = iter->second; + if (IsEnter(*invariant_node)) { + TF_RETURN_IF_ERROR( + LINMHandleInvariantEnter(invariant_node, num_outputs)); + } else if (IsConstant(*invariant_node)) { + TF_RETURN_IF_ERROR( + LINMHandleConst(invariant_node, num_outputs, frame_id)); + } else { + TF_RETURN_IF_ERROR( + LINMHandleInvariantNode(invariant_node, num_outputs, frame_id)); + } + } + return Status::OK(); +} + +Status LoopOptimizer::RevertInvariantNodes() { + std::deque reverted_nodes; + for (auto iter=invariant_nodes_.begin(); iter != invariant_nodes_.end();) { + bool erased = false; + const auto* node = iter->first; + if (!IsConstant(*node) && !IsEnter(*node) && iter->second > 0) { + auto& consumers = node_map_->GetOutputs(node->name()); + for (auto* consumer : consumers) { + if (!invariant_nodes_.count(consumer)) { + for (const auto& input : consumer->input()) { + if (IsControlInput(input) && NodeName(input) == node->name()) { + reverted_nodes.push_back(node); + invariant_nodes_.erase(iter++); + erased = true; + break; + } + } + if (erased) break; + } + } + } + if (!erased) ++iter; + } + while (!reverted_nodes.empty()) { + const auto* node = reverted_nodes.front(); + reverted_nodes.pop_front(); + std::set producers; + for (const auto& input : node->input()) { + auto* producer = node_map_->GetNode(input); + auto iter = invariant_nodes_.find(producer); + if (iter != invariant_nodes_.end()) { + if (IsControlInput(input) && + !IsConstant(*producer) && !IsEnter(*producer)) { + reverted_nodes.push_back(producer); + invariant_nodes_.erase(iter); + } else { + producers.insert(producer); + } + } + } + for (auto* producer : producers) { + auto iter = invariant_nodes_.find(producer); + if (iter != invariant_nodes_.end()) { + ++iter->second; + } + } + for (auto* consumer : node_map_->GetOutputs(node->name())) { + auto iter = invariant_nodes_.find(consumer); + if (iter != invariant_nodes_.end()) { + reverted_nodes.push_back(consumer); + invariant_nodes_.erase(iter); + } + } + } + return Status::OK(); +} + +Status LoopOptimizer::FindInvariantNodes(NodeDef* node) { + auto consumers = node_map_->GetOutputs(node->name()); + invariant_nodes_.insert(std::make_pair(node, consumers.size())); + for (auto* consumer : consumers) { + if (invariant_nodes_.count(consumer) || + ModifiesFrameInfo(*consumer)) { + continue; + } + bool is_invariant = true; + for (const auto& input : consumer->input()) { + if (!IsControlInput(input)) { + const auto& name = NodeName(input); + auto* producer = node_map_->GetNode(name); + if (!invariant_nodes_.count(producer)) { + if (IsConstant(*producer)) { + invariant_nodes_.insert( + std::make_pair(producer, node_map_->GetOutputs(name).size())); + } else { + is_invariant = false; + break; + } + } + } + } + if (is_invariant) { + std::set producers; + for (const auto& input : consumer->input()) { + auto* producer = node_map_->GetNode(input); + producers.insert(producer); + } + for (auto* producer : producers) { + auto iter = invariant_nodes_.find(producer); + if (iter != invariant_nodes_.end()) { + --iter->second; + } + } + TF_RETURN_IF_ERROR(FindInvariantNodes(consumer)); + } + } + return Status::OK(); +} + +Status LoopOptimizer::LoopInvariantNodeMotion() { + std::deque worklist; + for (auto iter = frame_map_.begin(); iter != frame_map_.end(); ++iter) { + auto* node = iter->first; + auto& frame_ids = iter->second; + if (frame_ids.size() >= 3) { + for (unsigned int i = 1; i < frame_ids.size() - 1; ++i) { + frame_parent_[frame_ids[i]] = frame_ids[i - 1]; + frame_children_[frame_ids[i]].insert(frame_ids[i + 1]); + } + } + if (frame_ids.size() >= 2) { + frame_children_[frame_ids[0]].insert(frame_ids[1]); + frame_parent_[frame_ids.back()] = frame_ids[frame_ids.size() - 2]; + } + if (frame_ids.size() >= 1) { + frame_children_.insert(std::make_pair(frame_ids.back(), empty_set_)); + if (node->op() == "LoopCond") { + if (loop_cond_.count(frame_ids.back())) { + return errors::InvalidArgument( + "Loop ", frame_ids.back(), + " has more than one LoopCond node: ", node->name(), " and ", + loop_cond_[frame_ids.back()]->name()); + } + loop_cond_[frame_ids.back()] = node; + } + if (IsEnter(*node) && node->attr().at("is_constant").b()) { + invariant_enters_[frame_ids.back()].push_back( + const_cast(node)); + } + } + } + + for (auto it = frame_children_.begin(); it != frame_children_.end(); ++it) { + if (it->second.size() == 0) { + worklist.push_back(it->first); + } + } + + while (!worklist.empty()) { + int frame_id = worklist.front(); + new_enter_id_ = 0; + worklist.pop_front(); + auto parent_it = frame_parent_.find(frame_id); + if (parent_it != frame_parent_.end()) { + int parent_id = parent_it->second; + frame_children_[parent_id].erase(frame_id); + if (frame_children_[parent_id].size() == 0) { + worklist.push_back(parent_id); + } + } + + if (invariant_enters_[frame_id].empty()) { + continue; + } + invariant_nodes_.clear(); + for (auto* enter : invariant_enters_[frame_id]) { + TF_RETURN_IF_ERROR(FindInvariantNodes(enter)); + } + + // revert invariant nodes that have control outputs to variant nodes + TF_RETURN_IF_ERROR(RevertInvariantNodes()); + + TF_RETURN_IF_ERROR(MoveInvariantNodes(frame_id)); + } + return Status::OK(); +} + Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { - *optimized_graph = item.graph; + optimized_graph_ = optimized_graph; + *optimized_graph_ = item.graph; + + // Set up helper data structures. + node_map_.reset(new NodeMap(optimized_graph_)); + int num_frames; + TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_, + &frame_map_, &num_frames)); + + TF_RETURN_IF_ERROR(LoopInvariantNodeMotion()); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.h b/tensorflow/core/grappler/optimizers/loop_optimizer.h index 106d4628ae..b5944cd30b 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.h +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.h @@ -17,13 +17,17 @@ limitations under the License. #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LOOP_OPTIMIZER_H_ #include +#include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { namespace grappler { +constexpr char kLoopOptimizer[] = "LoopOptimizer"; + class LoopOptimizer : public GraphOptimizer { public: LoopOptimizer() : opt_level_(RewriterConfig::ON) {} @@ -40,7 +44,29 @@ class LoopOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: + Status LoopInvariantNodeMotion(); + Status FindInvariantNodes(NodeDef* node); + Status RevertInvariantNodes(); + Status MoveInvariantNodes(const int fname); + Status LINMHandleInvariantNode(NodeDef* node, const int num_outputs, + const int frame_id); + Status LINMHandleConst(NodeDef* node, const int num_outputs, + const int frame_id); + Status LINMHandleInvariantEnter(NodeDef* node, const int num_outputs); + + std::map invariant_nodes_; + std::set empty_set_; + std::map> frame_children_; + std::map frame_parent_; + std::map loop_cond_; + std::map> invariant_enters_; + int new_enter_id_; RewriterConfig::Toggle opt_level_; + + std::unique_ptr node_map_; + FrameMap frame_map_; + std::unique_ptr graph_properties_; + GraphDef* optimized_graph_; // Not owned. }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index c09434f609..cc0432c3ed 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -26,7 +26,494 @@ namespace tensorflow { namespace grappler { namespace { -class LoopOptimizerTest : public ::testing::Test {}; +class LoopOptimizerTest : public ::testing::Test { + protected: + static NodeDef CreateNode(const string& name, + const std::vector& inputs) { + return CreateNode(name, "Identity", "", false, 0, inputs); + } + static NodeDef CreateNode(const string& name, const string& op, + const std::vector& inputs) { + return CreateNode(name, op, "", false, 0, inputs); + } + static NodeDef CreateNode(const string& name, const string& op, + const string& frame, + const bool is_constant, + const int piterations, + const std::vector& inputs) { + NodeDef node; + node.set_name(name); + if (!op.empty()) { + node.set_op(op); + } + if (!frame.empty()) { + AttrValue frame_name; + frame_name.set_s(frame); + node.mutable_attr()->insert({"frame_name", frame_name}); + } + if (op == "Enter") { + AttrValue is_const; + is_const.set_b(is_constant); + node.mutable_attr()->insert({"is_constant", is_const}); + AttrValue parallel_iterations; + parallel_iterations.set_i(piterations); + node.mutable_attr()->insert( + {"parallel_iterations", parallel_iterations}); + } + AttrValue type; + type.set_type(DT_FLOAT); + node.mutable_attr()->insert({"T", type}); + for (const string& input : inputs) { + node.add_input(input); + } + return node; + } +}; + +TEST_F(LoopOptimizerTest, Basic) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"VariantAdd", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"VariantAdd"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).back(), 0); +} + +TEST_F(LoopOptimizerTest, Const) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode("Const", "Const", {"^Identity"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "Const"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"VariantAdd", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"VariantAdd"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const")).size(), 0); +} + +TEST_F(LoopOptimizerTest, ControlOutput) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode( + "Less", "Less", {"VariantAdd", "less/y", "^InvariantAdd"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"VariantAdd"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); +} + +TEST_F(LoopOptimizerTest, NestedLoop1) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"VariantAdd"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 0); +} + +TEST_F(LoopOptimizerTest, NestedLoop2) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"InvariantAdd"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); +} + +TEST_F(LoopOptimizerTest, NestedLoopConst1) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"VariantAdd"}); + *graph.add_node() = CreateNode("Const2", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).back(), 1); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).back(), 0); +} + +TEST_F(LoopOptimizerTest, NestedLoopConst2) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"InvariantAdd"}); + *graph.add_node() = CreateNode("Const2", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).back(), 1); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 0); +} void VerifyGraphsEqual(const GraphDef& original_graph, const GraphDef& optimized_graph, const string& func) { diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 7ae77207af..39ecf017db 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -98,13 +98,13 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } - if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + if (cfg_.loop_optimization() == RewriterConfig::ON) { optimizers.push_back(std::unique_ptr( - new DependencyOptimizer(cfg_.dependency_optimization()))); + new LoopOptimizer(cfg_.loop_optimization()))); } - if (cfg_.loop_optimization() != RewriterConfig::OFF) { + if (cfg_.dependency_optimization() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( - new LoopOptimizer(cfg_.loop_optimization()))); + new DependencyOptimizer(cfg_.dependency_optimization()))); } if (cfg_.layout_optimizer() != RewriterConfig::OFF) { optimizers.push_back( -- GitLab From 1534cf92b4710d29dea780b1a17a6f7d2f10fc7b Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 2 Mar 2018 08:31:21 -0800 Subject: [PATCH 253/311] Internal-only change. PiperOrigin-RevId: 187623121 --- tensorflow/contrib/tpu/python/tpu/datasets.py | 2 +- tensorflow/contrib/tpu/python/tpu/datasets_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 29aea98542..71a3a92540 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -116,7 +116,7 @@ def StreamingFilesDataset(files, file_reader_job = file_reader_job or 'coordinator' - worker_job = worker_job or 'worker' + worker_job = worker_job or 'tpu_worker' if filename_shuffle_buffer_size is None: filename_shuffle_buffer_size = 4096 diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index 2c40797792..0173aac4f7 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -44,7 +44,7 @@ class DatasetsTest(test.TestCase): self._cluster_def = cluster_pb2.ClusterDef() worker_job = self._cluster_def.job.add() - worker_job.name = 'worker' + worker_job.name = 'tpu_worker' worker_job.tasks[0] = self._worker.target[len('grpc://'):] coord_job = self._cluster_def.job.add() coord_job.name = 'coordinator' -- GitLab From 4397f80b34d28144ed523a3f31a0fcbd1f3a9ba1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 08:45:01 -0800 Subject: [PATCH 254/311] Add a testing utility that can be called from compiled code, and which can mock a TF module for internal tests. Use it in api_test.py PiperOrigin-RevId: 187624343 --- tensorflow/contrib/py2tf/impl/api_test.py | 35 ++++++++++++---------- tensorflow/contrib/py2tf/utils/BUILD | 1 + tensorflow/contrib/py2tf/utils/__init__.py | 1 + tensorflow/contrib/py2tf/utils/testing.py | 35 ++++++++++++++++++++++ 4 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 tensorflow/contrib/py2tf/utils/testing.py diff --git a/tensorflow/contrib/py2tf/impl/api_test.py b/tensorflow/contrib/py2tf/impl/api_test.py index 51e99864ad..13f8e66018 100644 --- a/tensorflow/contrib/py2tf/impl/api_test.py +++ b/tensorflow/contrib/py2tf/impl/api_test.py @@ -18,23 +18,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl import api from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.pyct import parser from tensorflow.python.framework import constant_op -from tensorflow.python.ops import math_ops from tensorflow.python.platform import test +tf = utils.fake_tf() + + class ApiTest(test.TestCase): def setUp(self): - config.DEFAULT_UNCOMPILED_MODULES.add((math_ops.__name__,)) config.COMPILED_IMPORT_STATEMENTS = ( - 'from tensorflow.python.framework ' - 'import ops as tf', + 'from __future__ import print_function', 'from tensorflow.contrib.py2tf import utils as ' - 'py2tf_utils') + 'py2tf_utils', + 'tf = py2tf_utils.fake_tf()' + ) def test_decorator_recurses(self): @@ -47,7 +50,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -63,11 +66,11 @@ class ApiTest(test.TestCase): class TestClass(object): def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=False) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -84,11 +87,11 @@ class ApiTest(test.TestCase): @api.graph_ready def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -111,7 +114,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -133,7 +136,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= api.convert_inline(self.called_member, a) return x @@ -149,11 +152,11 @@ class ApiTest(test.TestCase): class TestClass(object): def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= api.graph_ready(self.called_member(a)) return x @@ -166,7 +169,7 @@ class ApiTest(test.TestCase): def test_to_graph_basic(self): def test_fn(x, s): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= 2 return x @@ -178,7 +181,7 @@ class ApiTest(test.TestCase): def test_to_code_basic(self): def test_fn(x, s): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x /= 2 return x diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 2086a9ef60..63261d5043 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -26,6 +26,7 @@ py_library( "multiple_dispatch.py", "py_func.py", "tensor_list.py", + "testing.py", "type_check.py", ], srcs_version = "PY2AND3", diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index 19bf2272bc..313e5c97cc 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -25,4 +25,5 @@ from tensorflow.contrib.py2tf.utils.misc import alias_tensors from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func +from tensorflow.contrib.py2tf.utils.testing import fake_tf from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/testing.py b/tensorflow/contrib/py2tf/utils/testing.py new file mode 100644 index 0000000000..cb4785d0dc --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/testing.py @@ -0,0 +1,35 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Testing utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import imp + +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops + + +def fake_tf(): + """Creates a fake module that looks like TensorFlow, for testing.""" + mod = imp.new_module('tensorflow') + mod_contents = dict() + mod_contents.update(math_ops.__dict__) + mod_contents.update(ops.__dict__) + mod_contents.update(mod.__dict__) + mod.__dict__.update(mod_contents) + return mod -- GitLab From 7013a5ae241cd0c5375065f549aec27fcee6465d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 2 Mar 2018 09:24:26 -0800 Subject: [PATCH 255/311] Take into account the return value mapping of functions PiperOrigin-RevId: 187628382 --- .../grappler/optimizers/function_optimizer.cc | 6 +- .../optimizers/function_optimizer_test.cc | 156 +++++++++++++++++- tensorflow/core/grappler/utils/functions.cc | 17 +- .../core/grappler/utils/functions_test.cc | 85 +++++++++- 4 files changed, 256 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index a5cf00c155..167e5a153a 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -102,7 +102,8 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, func_outputs->set_op("IdentityN"); func_outputs->set_device(node.device()); type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); - for (const OpDef::ArgDef& arg : func.signature().output_arg()) { + for (int i = 0; i < func.signature().output_arg_size(); ++i) { + const OpDef::ArgDef& arg = func.signature().output_arg(i); if (arg.type() != DT_INVALID) { type_list->add_type(arg.type()); } else { @@ -114,7 +115,8 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, } type_list->add_type(it->second.type()); } - func_outputs->add_input(strings::StrCat(node.name(), "/", arg.name())); + // Use the fetch names since they take into account the output mapping. + func_outputs->add_input(strings::StrCat(node.name(), "/", item->fetch[i])); } return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index fd61c067ed..5072abaac7 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -79,7 +79,7 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { EXPECT_EQ("IdentityN", node.op()); EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("y/y", node.input(0)); + EXPECT_EQ("y/y:0", node.input(0)); } else if (node.name() == "z") { count++; EXPECT_EQ("Identity", node.op()); @@ -166,7 +166,7 @@ TEST_F(FunctionOptimizerTest, FixedTypeFunction) { EXPECT_EQ("IdentityN", node.op()); EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("y/y", node.input(0)); + EXPECT_EQ("y/y:0", node.input(0)); } else if (node.name() == "z") { count++; EXPECT_EQ("Identity", node.op()); @@ -187,6 +187,158 @@ TEST_F(FunctionOptimizerTest, FixedTypeFunction) { test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } +TEST_F(FunctionOptimizerTest, FunctionWithOutputMapping) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "Exp_func", + // Args + {"in: float"}, + // Return values + {"out: float"}, + // Attr def + {}, + // Nodes + {{{"Linear_func"}, "Identity", {"in"}, {{"T", DT_FLOAT}}}, + {{"Exp"}, "Exp", {"Linear_func:output:0"}, {{"T", DT_FLOAT}}}}, + // Mapping + {{"out", "Exp:y:0"}}); + + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "Exp_func", {"x"}, {}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + func, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int count = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "y/inlined_inputs") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("x", node.input(0)); + } else if (node.name() == "y/in") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/inlined_inputs:0", node.input(0)); + } else if (node.name() == "y/Linear_func") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/in", node.input(0)); + } else if (node.name() == "y/Exp") { + count++; + EXPECT_EQ("Exp", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/Linear_func:0", node.input(0)); + } else if (node.name() == "y") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/Exp:0", node.input(0)); + } else if (node.name() == "z") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y", node.input(0)); + } + } + EXPECT_EQ(6, count); + + item.fetch = {"z"}; + Tensor pi(DT_FLOAT, {}); + pi.flat()(0) = 3.14f; + item.feed.emplace_back("x", pi); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +} + +TEST_F(FunctionOptimizerTest, FunctionWithInputForwarding) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "ForwardInputs", + // Args + {"in0: float", "in1: float", "arg2: float", "arg3: int32", "arg4: float"}, + // Return values + {"out0: float", "arg2: float", "arg3: int32"}, + // Attr def + {}, + // Nodes + {}, + // Mapping + {{"out0", "in0"}, {"arg2", "arg2"}, {"arg3", "arg3"}}); + + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("x0", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("x1", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("x2", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("x3", "Placeholder", {}, {{"dtype", DT_INT32}}, + device), + test::function::NDef("x4", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "ForwardInputs", + {"x0", "x1", "x2", "x3", "x4"}, {}, device), + test::function::NDef("z0", "Identity", {"y:0"}, {{"T", DT_FLOAT}}, + device), + test::function::NDef("z1", "Identity", {"y:1"}, {{"T", DT_FLOAT}}, + device), + test::function::NDef("z2", "Identity", {"y:2"}, {{"T", DT_INT32}}, + device)}, + // FunctionLib + { + func, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + item.fetch = {"z0", "z1", "z2"}; + Tensor in(DT_FLOAT, {}); + in.flat()(0) = 3.14f; + item.feed.emplace_back("x0", in); + in.flat()(0) = 2.7f; + item.feed.emplace_back("x1", in); + in.flat()(0) = 1.0f; + item.feed.emplace_back("x2", in); + in.flat()(0) = -1.0f; + item.feed.emplace_back("x4", in); + Tensor in_int(DT_INT32, {}); + in_int.flat()(0) = 1234; + item.feed.emplace_back("x3", in_int); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); + test::ExpectTensorEqual(tensors_expected[1], tensors[1]); + test::ExpectTensorEqual(tensors_expected[2], tensors[2]); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc index 37b00e0a30..4f286ce1c8 100644 --- a/tensorflow/core/grappler/utils/functions.cc +++ b/tensorflow/core/grappler/utils/functions.cc @@ -124,9 +124,22 @@ std::unique_ptr GrapplerItemFromFunctionDef( } } - // Add the function outputs to the list of fetch nodes. + // Add the function outputs to the list of fetch nodes, taking into account + // the output mapping if any. for (const auto& out : func.signature().output_arg()) { - new_item->fetch.emplace_back(out.name()); + auto it = func.ret().find(out.name()); + if (it != func.ret().end()) { + auto it2 = port_map.find(it->second); + if (it2 == port_map.end()) { + LOG(ERROR) << "Unknown output mapping: " << it->first << " to " + << it->second; + return nullptr; + } else { + new_item->fetch.emplace_back(it2->second); + } + } else { + new_item->fetch.emplace_back(out.name()); + } } // Add the function inputs to the list of feeds. for (const auto& inp : func.signature().input_arg()) { diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc index 25ccb50084..25ec50d478 100644 --- a/tensorflow/core/grappler/utils/functions_test.cc +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -54,7 +54,7 @@ TEST_F(FunctionsTest, FromSimpleFunctionDef) { CHECK(item); EXPECT_EQ("XTimesTwo", item->id); EXPECT_EQ(4, item->graph.node_size()); - EXPECT_EQ(std::vector({"y"}), item->fetch); + EXPECT_EQ(std::vector({"y:0"}), item->fetch); EXPECT_EQ(1, item->feed.size()); EXPECT_EQ("x", item->feed[0].first); @@ -121,7 +121,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithMultiOutputNodes) { CHECK(item); EXPECT_EQ("SubGrad", item->id); EXPECT_EQ(12, item->graph.node_size()); - EXPECT_EQ(std::vector({"dx", "dy"}), item->fetch); + EXPECT_EQ(std::vector({"dx:0", "dy:0"}), item->fetch); EXPECT_EQ(3, item->feed.size()); EXPECT_EQ("x", item->feed[0].first); EXPECT_EQ("y", item->feed[1].first); @@ -184,6 +184,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithNestedFuncs) { {{"x2"}, "Mul", {"x", "x"}, {{"T", DT_FLOAT}}}, {{"y2"}, "Mul", {"y", "y"}, {{"T", DT_FLOAT}}, {"a1"}}, {{"o"}, "Add", {"x2:z:0", "y2:z:0"}, {{"T", DT_FLOAT}}}}, + // Output Mapping {{"o", "o:z:0"}}); std::unordered_map func_attr; @@ -227,6 +228,86 @@ TEST_F(FunctionsTest, FromFunctionDefWithNestedFuncs) { } } +TEST_F(FunctionsTest, FromFunctionDefWithOutputMappings) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "Exp_func", + // Args + {"in: float"}, + // Return values + {"out: float"}, + // Attr def + {}, + // Nodes + {{{"Linear_func"}, "Identity", {"in"}, {{"T", DT_FLOAT}}}, + {{"Exp"}, "Exp", {"Linear_func:output:0"}, {{"T", DT_FLOAT}}}}, + // Mapping + {{"out", "Exp:y:0"}}); + + std::unordered_map func_attr; + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + EXPECT_EQ(1, item->fetch.size()); + EXPECT_EQ("Exp:0", item->fetch[0]); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "in") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "Linear_func") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("in", node.input(0)); + } else if (node.name() == "Exp") { + EXPECT_EQ("Exp", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("Linear_func:0", node.input(0)); + } + } +} + +TEST_F(FunctionsTest, FromFunctionDefWithInputForwarding) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "ForwardInputs", + // Args + {"in0: float", "in1: float", "arg2: float", "arg3: int32", "arg4: float"}, + // Return values + {"out0: float", "arg2: float", "arg3: int32"}, + // Attr def + {}, + // Nodes + {}, + // Mapping + {{"out0", "in0"}}); + + std::unordered_map func_attr; + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + EXPECT_EQ(3, item->fetch.size()); + EXPECT_EQ("in0", item->fetch[0]); + EXPECT_EQ("arg2", item->fetch[1]); + EXPECT_EQ("arg3", item->fetch[2]); + + EXPECT_EQ(5, item->graph.node_size()); + for (const NodeDef &node : item->graph.node()) { + EXPECT_TRUE(node.name() == "in0" || node.name() == "in1" || + node.name() == "arg2" || node.name() == "arg3" || + node.name() == "arg4"); + EXPECT_EQ("Placeholder", node.op()); + if (node.name() == "arg3") { + EXPECT_EQ(DT_INT32, node.attr().at("T").type()); + } else { + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + } + } +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 96845a7c31aea72d44b4e16084ab5350896ca5c8 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 2 Mar 2018 09:32:36 -0800 Subject: [PATCH 256/311] Only use softfp for android builds to make odroid builds work. PiperOrigin-RevId: 187629282 --- tensorflow/contrib/lite/kernels/internal/BUILD | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index f47fb04cba..6ccad3b1ce 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -10,21 +10,25 @@ tflite_deps_intel = [ "@arm_neon_2_x86_sse", ] +HARD_FP_FLAGS_IF_APPLICABLE = select({ + "//tensorflow:android_arm": ["-mfloat-abi=softfp"], + "//tensorflow:android_arm64": ["-mfloat-abi=softfp"], + "//tensorflow:android_armeabi": ["-mfloat-abi=softfp"], + "//conditions:default": [], +}) + NEON_FLAGS_IF_APPLICABLE = select({ ":arm": [ "-O3", "-mfpu=neon", - "-mfloat-abi=softfp", ], ":armeabi-v7a": [ "-O3", "-mfpu=neon", - "-mfloat-abi=softfp", ], ":armv7a": [ "-O3", "-mfpu=neon", - "-mfloat-abi=softfp", ], "//conditions:default": [ "-O3", @@ -283,7 +287,7 @@ cc_library( "optimized/neon_tensor_utils.h", "optimized/tensor_utils_impl.h", ], - copts = NEON_FLAGS_IF_APPLICABLE, + copts = NEON_FLAGS_IF_APPLICABLE + HARD_FP_FLAGS_IF_APPLICABLE, deps = [ ":cpu_check", ":portable_tensor_utils", -- GitLab From cd810e21bdb0a5631836c69e5273135e4b15a441 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 2 Mar 2018 09:53:54 -0800 Subject: [PATCH 257/311] No need to override _handle_device for variables anymore PiperOrigin-RevId: 187631915 --- .../resource_variable_ops_test.py | 1 - .../python/ops/resource_variable_ops.py | 25 ++++++------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 71699fe0ad..10ba9fa674 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -526,7 +526,6 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual(dtypes.int32, v.dtype) self.assertEqual("foo/var7:0", v.name) self.assertAllEqual([10, 20, 35], v.shape.as_list()) - self.assertEqual(context.get_default_context().device_name, v.device) self.assertTrue(isinstance(v.handle, ops.EagerTensor)) self.assertEqual(constraint, v.constraint) self.assertAllEqual(init.numpy(), v.read_value().numpy()) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index bf186f1734..cbac3c686d 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -384,9 +384,6 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, graph_mode=self._in_graph_mode) - self._handle_device = ( - self._handle.device if self._in_graph_mode else - context.get_default_context().device_name) self._shape = initial_value.get_shape() else: initial_value = initial_value() @@ -399,9 +396,6 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, graph_mode=False) - self._handle_device = ( - self._handle.device if self._in_graph_mode else - context.get_default_context().device_name) self._shape = initial_value.get_shape() # pylint: enable=protected-access @@ -425,8 +419,6 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, graph_mode=self._in_graph_mode) - self._handle_device = (self._handle.device if self._in_graph_mode else - context.get_default_context().device_name) self._shape = initial_value.get_shape() self._initial_value = initial_value if self._in_graph_mode else None @@ -449,7 +441,7 @@ class ResourceVariable(variables.Variable): with ops.name_scope("Read"), ops.colocate_with(self._handle): # Manually assign reads to the handle's device to avoid log # messages. - with ops.device(self._handle_device): + with ops.device(self._handle.device): value = self._read_variable_op() self._graph_element = value if caching_device is not None: @@ -489,7 +481,7 @@ class ResourceVariable(variables.Variable): # cycles being uncollectable, and means that no __del__ will be defined at # all in graph mode. self._handle_deleter = EagerResourceDeleter( - handle=self._handle, handle_device=self._handle_device) + handle=self._handle, handle_device=self._handle.device) def _init_from_proto(self, variable_def, import_scope=None): """Initializes from `VariableDef` proto.""" @@ -507,7 +499,6 @@ class ResourceVariable(variables.Variable): variable_def.variable_name, import_scope=import_scope)) self._shape = tensor_shape.TensorShape( self._handle.op.get_attr("shape")) - self._handle_device = self._handle.device self._handle_name = self._handle.name self._initializer_op = g.as_graph_element( ops.prepend_name_scope( @@ -552,7 +543,7 @@ class ResourceVariable(variables.Variable): @property def device(self): """The device this variable is on.""" - return self._handle_device + return self._handle.device @property def graph(self): @@ -586,7 +577,7 @@ class ResourceVariable(variables.Variable): if self._cached_value is not None: return self._cached_value with ops.colocate_with(None, ignore_existing=True): - with ops.device(self._handle_device): + with ops.device(self._handle.device): return self._read_variable_op() def _as_graph_element(self): @@ -683,7 +674,7 @@ class ResourceVariable(variables.Variable): """ with ops.name_scope("Read"): # Ensure we read the variable in the same device as the handle. - with ops.device(self._handle_device): + with ops.device(self._handle.device): value = self._read_variable_op() # Return an identity so it can get placed on whatever device the context # specifies instead of the device where the variable is. @@ -840,8 +831,7 @@ class ResourceVariable(variables.Variable): if hasattr(self, "_trainable") and self._trainable: tape.watch_variable(self) return _UnreadVariable( - self._handle, self.dtype, self._handle_device, self._shape, - self._in_graph_mode, + self._handle, self.dtype, self._shape, self._in_graph_mode, self._handle_deleter if not self._in_graph_mode else None, op) def assign(self, value, use_locking=None, name=None, read_value=True): @@ -952,7 +942,7 @@ class _UnreadVariable(ResourceVariable): Pretends to be the tensor if anyone looks. """ - def __init__(self, handle, dtype, handle_device, # pylint: disable=super-init-not-called + def __init__(self, handle, dtype, # pylint: disable=super-init-not-called shape, in_graph_mode, deleter, parent_op): # We do not call super init on purpose. self._trainable = False @@ -960,7 +950,6 @@ class _UnreadVariable(ResourceVariable): self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access self._in_graph_mode = in_graph_mode self._handle = handle - self._handle_device = handle_device self._shape = shape self._initial_value = None if isinstance(self._handle, ops.EagerTensor): -- GitLab From 929c435bcba105cf558e1942b63389812b62aff3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 10:05:14 -0800 Subject: [PATCH 258/311] Add bfloat16 support for CPU ops. PiperOrigin-RevId: 187633511 --- tensorflow/core/kernels/check_numerics_op.cc | 3 ++ tensorflow/core/kernels/cwise_op_add_1.cc | 8 +++--- tensorflow/core/kernels/cwise_op_isnan.cc | 3 +- tensorflow/core/kernels/cwise_op_mul_1.cc | 4 +-- tensorflow/core/kernels/cwise_op_square.cc | 4 +-- tensorflow/core/kernels/cwise_op_sub.cc | 4 +-- tensorflow/core/kernels/cwise_ops_common.h | 2 ++ tensorflow/core/kernels/training_ops.cc | 17 +++++++++++ tensorflow/core/lib/bfloat16/bfloat16.h | 30 ++++++++++++++++++++ 9 files changed, 64 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/check_numerics_op.cc b/tensorflow/core/kernels/check_numerics_op.cc index 6040b2b399..d3b67f4614 100644 --- a/tensorflow/core/kernels/check_numerics_op.cc +++ b/tensorflow/core/kernels/check_numerics_op.cc @@ -15,6 +15,8 @@ limitations under the License. // See docs in ../ops/array_ops.cc. +#include "tensorflow/core/lib/bfloat16/bfloat16.h" + #include #include #include @@ -219,6 +221,7 @@ class CheckNumericsOp : public AsyncOpKernel { Name("CheckNumerics").Device(DEVICE_CPU).TypeConstraint("T"), \ CheckNumericsOp); TF_CALL_half(REGISTER_CPU_KERNEL); +TF_CALL_bfloat16(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc index bf32c8a54b..9e4ffe950c 100644 --- a/tensorflow/core/kernels/cwise_op_add_1.cc +++ b/tensorflow/core/kernels/cwise_op_add_1.cc @@ -16,10 +16,10 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32, - int64); -REGISTER5(BinaryOp, CPU, "AddV2", functor::add, float, Eigen::half, double, - int32, int64); +REGISTER6(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32, + int64, bfloat16); +REGISTER6(BinaryOp, CPU, "AddV2", functor::add, float, Eigen::half, double, + int32, int64, bfloat16); #if GOOGLE_CUDA REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc index aa180c247e..707dc9e49c 100644 --- a/tensorflow/core/kernels/cwise_op_isnan.cc +++ b/tensorflow/core/kernels/cwise_op_isnan.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double, + bfloat16); #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc index 0e8d2e3735..cff0407b83 100644 --- a/tensorflow/core/kernels/cwise_op_mul_1.cc +++ b/tensorflow/core/kernels/cwise_op_mul_1.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, uint8, - int32); +REGISTER6(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, uint8, + int32, bfloat16); #if defined(__ANDROID_TYPES_SLIM__) // We only register the first type when we have multi-argument calls in the // case where we're trying to reduce executable size, but it turns out that the diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc index 7fc2f6bf08..84f695ddc2 100644 --- a/tensorflow/core/kernels/cwise_op_square.cc +++ b/tensorflow/core/kernels/cwise_op_square.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER7(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double, - int32, int64, complex64, complex128); +REGISTER8(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double, + int32, int64, complex64, complex128, bfloat16); #if GOOGLE_CUDA REGISTER4(UnaryOp, GPU, "Square", functor::square, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index 025041946a..eb27bddb78 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, - int64, complex64, complex128); +REGISTER8(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, + int64, bfloat16, complex64, complex128); #if !defined(__ANDROID_TYPES_SLIM__) // Sub op for int8, uint8, int16, uint16 REGISTER4(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16); diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h index 8295fa939e..e32eccf547 100644 --- a/tensorflow/core/kernels/cwise_ops_common.h +++ b/tensorflow/core/kernels/cwise_ops_common.h @@ -20,6 +20,8 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/lib/bfloat16/bfloat16.h" + #ifdef TENSORFLOW_USE_SYCL #include "tensorflow/core/kernels/cwise_ops_sycl_common.h" #endif diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 233aa03c32..f53c567c4d 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -15,6 +15,8 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/lib/bfloat16/bfloat16.h" + #include #include "tensorflow/core/framework/op_kernel.h" @@ -494,6 +496,7 @@ class ApplyGradientDescentOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -647,6 +650,7 @@ class ApplyAdadeltaOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -822,6 +826,7 @@ class SparseApplyAdadeltaOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1107,6 +1112,7 @@ class ApplyAdagradOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1360,6 +1366,7 @@ class SparseApplyAdagradOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1961,6 +1968,7 @@ class ApplyFtrlOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1982,6 +1990,7 @@ TF_CALL_double(REGISTER_CPU_KERNELS); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2230,6 +2239,7 @@ class SparseApplyFtrlOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2254,6 +2264,7 @@ TF_CALL_double(REGISTER_CPU_KERNELS); REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2332,6 +2343,7 @@ class ApplyMomentumOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2471,6 +2483,7 @@ class SparseApplyMomentumOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2698,6 +2711,7 @@ class ApplyAdamOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2937,6 +2951,7 @@ class ApplyCenteredRMSPropOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -3352,6 +3367,7 @@ class ApplyAddSignOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -3457,6 +3473,7 @@ class ApplyPowerSignOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h index f9cca0ef2a..de8f92d1eb 100644 --- a/tensorflow/core/lib/bfloat16/bfloat16.h +++ b/tensorflow/core/lib/bfloat16/bfloat16.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_ #define TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_ +#include #include #ifdef __CUDACC__ @@ -271,6 +272,35 @@ struct hash { return hash()(static_cast(v)); } }; + +using tensorflow::bfloat16; +inline bool isinf(const bfloat16& a) { return std::isinf(float(a)); } +inline bool isnan(const bfloat16& a) { return std::isnan(float(a)); } +inline bool isfinite(const bfloat16& a) { return std::isfinite(float(a)); } +inline bfloat16 abs(const bfloat16& a) { return bfloat16(std::abs(float(a))); } +inline bfloat16 exp(const bfloat16& a) { return bfloat16(std::exp(float(a))); } +inline bfloat16 log(const bfloat16& a) { return bfloat16(std::log(float(a))); } +inline bfloat16 log10(const bfloat16& a) { + return bfloat16(std::log10(float(a))); +} +inline bfloat16 sqrt(const bfloat16& a) { + return bfloat16(std::sqrt(float(a))); +} +inline bfloat16 pow(const bfloat16& a, const bfloat16& b) { + return bfloat16(std::pow(float(a), float(b))); +} +inline bfloat16 sin(const bfloat16& a) { return bfloat16(std::sin(float(a))); } +inline bfloat16 cos(const bfloat16& a) { return bfloat16(std::cos(float(a))); } +inline bfloat16 tan(const bfloat16& a) { return bfloat16(std::tan(float(a))); } +inline bfloat16 tanh(const bfloat16& a) { + return bfloat16(std::tanh(float(a))); +} +inline bfloat16 floor(const bfloat16& a) { + return bfloat16(std::floor(float(a))); +} +inline bfloat16 ceil(const bfloat16& a) { + return bfloat16(std::ceil(float(a))); +} } // namespace std #endif // TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_ -- GitLab From 3942fbfcc3252e2e479e3dde8d996e8e156558c4 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 13:28:17 -0500 Subject: [PATCH 259/311] Disable loop_optimizer_test for now --- tensorflow/core/grappler/optimizers/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index b0a7587600..4e14f0ba40 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -538,6 +538,10 @@ tf_cc_test( name = "loop_optimizer_test", size = "small", srcs = ["loop_optimizer_test.cc"], + tags = [ + "manual", + "no_oss", + ], # b/74111495 deps = [ ":loop_optimizer", "//tensorflow/cc:cc_ops", -- GitLab From bce4f52b7201b943d544606dcca51ef4ba2b2c1a Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 2 Mar 2018 10:30:01 -0800 Subject: [PATCH 260/311] tf.keras: Remove unnecessary "with self.test_sesion()" statements in tests. The test decorator that runs the test twice (once with eager execution enabled, once without) doesn't require the block, and this makes the code appear more eager-friendly (as there is no concept of a session when eager execution is enabled). PiperOrigin-RevId: 187637008 --- .../_impl/keras/model_subclassing_test.py | 245 +++++++++--------- 1 file changed, 117 insertions(+), 128 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py index 3d71a620fc..58b144365b 100644 --- a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py +++ b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py @@ -174,19 +174,18 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = SimpleTestModel(num_classes=num_classes, - use_dp=True, - use_bn=True) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = SimpleTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) @test_util.run_in_graph_and_eager_modes() def test_multi_io_workflow_with_np_arrays(self): @@ -194,21 +193,20 @@ class ModelSubclassingTest(test.TestCase): num_samples = 1000 input_dim = 50 - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, - use_dp=True, - use_bn=True) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = MultiIOTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - _ = model.evaluate([x1, x2], [y1, y2], verbose=0) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + _ = model.evaluate([x1, x2], [y1, y2], verbose=0) def test_single_io_workflow_with_tensors(self): @@ -321,14 +319,13 @@ class ModelSubclassingTest(test.TestCase): x = np.ones((num_samples, input_dim)) y = np.ones((num_samples, input_dim)) - with self.test_session(): - model = BNNet() - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - y_ref = model.predict(x) + model = BNNet() + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + y_ref = model.predict(x) - model.train_on_batch(x, y) - y_new = model.predict(x) - self.assertGreater(np.sum(np.abs(y_ref - y_new)), 0.1) + model.train_on_batch(x, y) + y_new = model.predict(x) + self.assertGreater(np.sum(np.abs(y_ref - y_new)), 0.1) @test_util.run_in_graph_and_eager_modes() def test_training_and_inference_behavior(self): @@ -350,14 +347,13 @@ class ModelSubclassingTest(test.TestCase): x = self.dp(inputs) return self.dense(x) - with self.test_session(): - model = DPNet() - x = np.ones((num_samples, input_dim)) - y = model.predict(x) - self.assertEqual(np.sum(y), np.sum(x)) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - loss = model.train_on_batch(x, y) - self.assertGreater(loss, 0.1) + model = DPNet() + x = np.ones((num_samples, input_dim)) + y = model.predict(x) + self.assertEqual(np.sum(y), np.sum(x)) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + loss = model.train_on_batch(x, y) + self.assertGreater(loss, 0.1) @test_util.run_in_graph_and_eager_modes() def test_training_methods(self): @@ -373,21 +369,20 @@ class ModelSubclassingTest(test.TestCase): y1 = np.zeros((num_samples, num_classes[0])) y2 = np.zeros((num_samples, num_classes[1])) - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - model.fit({'input_1': x1, 'input_2': x2}, - {'output_1': y1, 'output_2': y2}, - epochs=2, batch_size=32) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0, - validation_data=([x1, x2], [y1, y2])) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + model.fit({'input_1': x1, 'input_2': x2}, + {'output_1': y1, 'output_2': y2}, + epochs=2, batch_size=32) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0, + validation_data=([x1, x2], [y1, y2])) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.train_on_batch([x1, x2], [y1, y2]) - model.train_on_batch({'input_1': x1, 'input_2': x2}, - {'output_1': y1, 'output_2': y2}) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.train_on_batch([x1, x2], [y1, y2]) + model.train_on_batch({'input_1': x1, 'input_2': x2}, + {'output_1': y1, 'output_2': y2}) @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def test_inference_methods(self): @@ -402,17 +397,16 @@ class ModelSubclassingTest(test.TestCase): y1 = np.zeros((num_samples, num_classes[0])) y2 = np.zeros((num_samples, num_classes[1])) - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.evaluate([x1, x2], [y1, y2]) - model.test_on_batch([x1, x2], [y1, y2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.evaluate([x1, x2], [y1, y2]) + model.test_on_batch([x1, x2], [y1, y2]) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.predict([x1, x2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.predict([x1, x2]) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.predict_on_batch([x1, x2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.predict_on_batch([x1, x2]) @test_util.run_in_graph_and_eager_modes() def test_trainable_mutation(self): @@ -435,26 +429,25 @@ class ModelSubclassingTest(test.TestCase): y1 = np.zeros((num_samples, num_classes[0])) y2 = np.zeros((num_samples, num_classes[1])) - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - y_ref_1, y_ref_2 = model.predict([x1, x2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + y_ref_1, y_ref_2 = model.predict([x1, x2]) - fd, fname = tempfile.mkstemp('.h5') - model.save_weights(fname) + fd, fname = tempfile.mkstemp('.h5') + model.save_weights(fname) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - # need to build the model before loading weights - # (otherwise no weights to load) - model._set_inputs([x1, x2]) - model.load_weights(fname) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + # need to build the model before loading weights + # (otherwise no weights to load) + model._set_inputs([x1, x2]) + model.load_weights(fname) - y1, y2 = model.predict([x1, x2]) - self.assertAllClose(y_ref_1, y1, atol=1e-5) - self.assertAllClose(y_ref_2, y2, atol=1e-5) - os.close(fd) - os.remove(fname) + y1, y2 = model.predict([x1, x2]) + self.assertAllClose(y_ref_1, y1, atol=1e-5) + self.assertAllClose(y_ref_2, y2, atol=1e-5) + os.close(fd) + os.remove(fname) @test_util.run_in_graph_and_eager_modes() def test_summary(self): @@ -488,23 +481,22 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = NestedTestModel1(num_classes=num_classes) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = NestedTestModel1(num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) - self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) - self.assertEqual(len(model.non_trainable_weights), - 2 + len(model.test_net.non_trainable_weights)) - self.assertEqual(len(model.trainable_weights), - 6 + len(model.test_net.trainable_weights)) + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual(len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights)) + self.assertEqual(len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights)) @test_util.run_in_graph_and_eager_modes() def test_graph_nested_in_subclass(self): @@ -512,23 +504,22 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = NestedTestModel2(num_classes=num_classes) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = NestedTestModel2(num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) - self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) - self.assertEqual(len(model.non_trainable_weights), - 2 + len(model.test_net.non_trainable_weights)) - self.assertEqual(len(model.trainable_weights), - 6 + len(model.test_net.trainable_weights)) + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual(len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights)) + self.assertEqual(len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights)) @test_util.run_in_graph_and_eager_modes() def test_subclass_nested_in_graph(self): @@ -536,22 +527,21 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = get_nested_model_3(input_dim=input_dim, num_classes=num_classes) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = get_nested_model_3(input_dim=input_dim, num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) - self.assertEqual(len(model.weights), 16) - self.assertEqual( - len(model.non_trainable_weights), 4) - self.assertEqual(len(model.trainable_weights), 12) + self.assertEqual(len(model.weights), 16) + self.assertEqual( + len(model.non_trainable_weights), 4) + self.assertEqual(len(model.trainable_weights), 12) @test_util.run_in_graph_and_eager_modes() def test_support_for_manual_training_arg(self): @@ -575,14 +565,13 @@ class ModelSubclassingTest(test.TestCase): x = self.dp(inputs, training=training) return self.dense(x) - with self.test_session(): - model = DPNet() - x = np.ones((10, 10)) - y = model.predict(x) - self.assertEqual(np.sum(y), np.sum(x)) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - loss = model.train_on_batch(x, y) - self.assertGreater(loss, 0.1) + model = DPNet() + x = np.ones((10, 10)) + y = model.predict(x) + self.assertEqual(np.sum(y), np.sum(x)) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + loss = model.train_on_batch(x, y) + self.assertGreater(loss, 0.1) if __name__ == '__main__': -- GitLab From b253460fd13dcfcf27eca610c5d397ef6ac980d2 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 13:37:29 -0500 Subject: [PATCH 261/311] Fix formatting in grappler/optimizers/BUILD --- tensorflow/core/grappler/optimizers/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 4e14f0ba40..1381bfd18b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -540,8 +540,8 @@ tf_cc_test( srcs = ["loop_optimizer_test.cc"], tags = [ "manual", - "no_oss", - ], # b/74111495 + "no_oss", # b/74111495 + ], deps = [ ":loop_optimizer", "//tensorflow/cc:cc_ops", -- GitLab From b5fa6af52198570a758d88b4bd64495353d8e7c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 11:11:15 -0800 Subject: [PATCH 262/311] Updating toolchain configs for GPU builds PiperOrigin-RevId: 187643585 --- tensorflow/tools/ci_build/Dockerfile.rbe.gpu | 26 + third_party/gpus/cuda/remote.BUILD.tpl | 26 +- third_party/toolchains/gpus/crosstool/BUILD | 5 + .../toolchains/gpus/crosstool/CROSSTOOL | 6 +- third_party/toolchains/gpus/cuda/BUILD | 2016 ++++++++--------- third_party/toolchains/gpus/py/BUILD | 171 ++ 6 files changed, 1186 insertions(+), 1064 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.gpu create mode 100644 third_party/toolchains/gpus/py/BUILD diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.gpu b/tensorflow/tools/ci_build/Dockerfile.rbe.gpu new file mode 100644 index 0000000000..24ff4765a6 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.gpu @@ -0,0 +1,26 @@ +FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 + +LABEL maintainer="Nick Lopez " + +# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to +# /usr/local/cuda +RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include +RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64 + +# Copy and run the install scripts. +COPY install/*.sh /install/ +ARG DEBIAN_FRONTEND=noninteractive +RUN /install/install_bootstrap_deb_packages.sh +RUN add-apt-repository -y ppa:openjdk-r/ppa && \ + add-apt-repository -y ppa:george-edison55/cmake-3.x +RUN /install/install_deb_packages.sh +RUN /install/install_pip_packages.sh +RUN /install/install_golang.sh + +# Install clang from pre-built package +RUN cd /tmp && \ + wget https://storage.googleapis.com/clang-builds-stable/clang-ubuntu16_04/clang_r323528.tar.gz && \ + echo "26752d9f5785df07193fac8316ba5d5ba3bec36d970c29a1577360848818ac74 clang_r323528.tar.gz" | sha256sum -c && \ + tar -C /usr/local -xf clang_r323528.tar.gz && \ + rm clang_r323528.tar.gz + diff --git a/third_party/gpus/cuda/remote.BUILD.tpl b/third_party/gpus/cuda/remote.BUILD.tpl index d88d512b90..f774def5e6 100644 --- a/third_party/gpus/cuda/remote.BUILD.tpl +++ b/third_party/gpus/cuda/remote.BUILD.tpl @@ -41,65 +41,65 @@ config_setting( alias( name = "cuda_headers", - actual = "%{remote_cuda_repo}cuda:cuda_headers", + actual = "%{remote_cuda_repo}/cuda:cuda_headers", ) alias( name = "cudart_static", - actual = "%{remote_cuda_repo}cuda:cudart_static", + actual = "%{remote_cuda_repo}/cuda:cudart_static", ) alias( name = "cuda_driver", - actual = "%{remote_cuda_repo}cuda:cuda_driver", + actual = "%{remote_cuda_repo}/cuda:cuda_driver", ) alias( name = "cudart", - actual = "%{remote_cuda_repo}cuda:cudart", + actual = "%{remote_cuda_repo}/cuda:cudart", ) alias( name = "cublas", - actual = "%{remote_cuda_repo}cuda:cublas", + actual = "%{remote_cuda_repo}/cuda:cublas", ) alias( name = "cusolver", - actual = "%{remote_cuda_repo}cuda:cusolver", + actual = "%{remote_cuda_repo}/cuda:cusolver", ) alias( name = "cudnn", - actual = "%{remote_cuda_repo}cuda:cudnn", + actual = "%{remote_cuda_repo}/cuda:cudnn", ) alias( name = "cufft", - actual = "%{remote_cuda_repo}cuda:cufft", + actual = "%{remote_cuda_repo}/cuda:cufft", ) alias( name = "curand", - actual = "%{remote_cuda_repo}cuda:curand", + actual = "%{remote_cuda_repo}/cuda:curand", ) alias( name = "cuda", - actual = "%{remote_cuda_repo}cuda:cuda", + actual = "%{remote_cuda_repo}/cuda:cuda", ) alias( name = "cupti_headers", - actual = "%{remote_cuda_repo}cuda:cupti_headers", + actual = "%{remote_cuda_repo}/cuda:cupti_headers", ) alias( name = "cupti_dsos", - actual = "%{remote_cuda_repo}cuda:cupti_dsos", + actual = "%{remote_cuda_repo}/cuda:cupti_dsos", ) alias( name = "libdevice_root", - actual = "%{remote_cuda_repo}cuda:libdevice_root", + actual = "%{remote_cuda_repo}/cuda:libdevice_root", ) diff --git a/third_party/toolchains/gpus/crosstool/BUILD b/third_party/toolchains/gpus/crosstool/BUILD index a8c6b0f029..1f9065007c 100644 --- a/third_party/toolchains/gpus/crosstool/BUILD +++ b/third_party/toolchains/gpus/crosstool/BUILD @@ -50,3 +50,8 @@ filegroup( name = "empty", srcs = [], ) + +filegroup( + name = "crosstool_wrapper_driver_is_not_gcc", + srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], +) diff --git a/third_party/toolchains/gpus/crosstool/CROSSTOOL b/third_party/toolchains/gpus/crosstool/CROSSTOOL index 16ee2f82c6..d6ee7e38c4 100644 --- a/third_party/toolchains/gpus/crosstool/CROSSTOOL +++ b/third_party/toolchains/gpus/crosstool/CROSSTOOL @@ -144,8 +144,8 @@ toolchain { flag_group { # All warnings are enabled. Maybe enable -Werror as well? flag: "-Wall" - # TODO(ngiraldo): Some parts of the codebase set -Werror and hit this - # warning, so switch it off for now. + # Some parts of the codebase set -Werror and hit this warning, so + # switch it off for now. flag: "-Wno-invalid-partial-specialization" } } @@ -303,7 +303,7 @@ toolchain { cxx_builtin_include_directory: "/usr/include/x86_64-linux-gnu/c++/5.4.0" cxx_builtin_include_directory: "/usr/include/c++/5.4.0/backward" cxx_builtin_include_directory: "/usr/local/include" - cxx_builtin_include_directory: "/usr/local/lib/clang/6.0.0/include" + cxx_builtin_include_directory: "/usr/local/lib/clang/7.0.0/include" cxx_builtin_include_directory: "/usr/include/x86_64-linux-gnu" cxx_builtin_include_directory: "/usr/include" } diff --git a/third_party/toolchains/gpus/cuda/BUILD b/third_party/toolchains/gpus/cuda/BUILD index 39136de99c..cfc6930851 100644 --- a/third_party/toolchains/gpus/cuda/BUILD +++ b/third_party/toolchains/gpus/cuda/BUILD @@ -51,6 +51,7 @@ cc_library( includes = [ ".", "cuda/include", + "cuda/include/crt", ], visibility = ["//visibility:public"], ) @@ -84,8 +85,8 @@ cc_library( cc_library( name = "cudart", - srcs = ["cuda/lib/libcudart.so.8.0"], - data = ["cuda/lib/libcudart.so.8.0"], + srcs = ["cuda/lib/libcudart.so.9.0"], + data = ["cuda/lib/libcudart.so.9.0"], includes = [ ".", "cuda/include", @@ -96,8 +97,8 @@ cc_library( cc_library( name = "cublas", - srcs = ["cuda/lib/libcublas.so.8.0"], - data = ["cuda/lib/libcublas.so.8.0"], + srcs = ["cuda/lib/libcublas.so.9.0"], + data = ["cuda/lib/libcublas.so.9.0"], includes = [ ".", "cuda/include", @@ -108,8 +109,8 @@ cc_library( cc_library( name = "cusolver", - srcs = ["cuda/lib/libcusolver.so.8.0"], - data = ["cuda/lib/libcusolver.so.8.0"], + srcs = ["cuda/lib/libcusolver.so.9.0"], + data = ["cuda/lib/libcusolver.so.9.0"], includes = [ ".", "cuda/include", @@ -121,8 +122,8 @@ cc_library( cc_library( name = "cudnn", - srcs = ["cuda/lib/libcudnn.so.6"], - data = ["cuda/lib/libcudnn.so.6"], + srcs = ["cuda/lib/libcudnn.so.7"], + data = ["cuda/lib/libcudnn.so.7"], includes = [ ".", "cuda/include", @@ -133,8 +134,8 @@ cc_library( cc_library( name = "cufft", - srcs = ["cuda/lib/libcufft.so.8.0"], - data = ["cuda/lib/libcufft.so.8.0"], + srcs = ["cuda/lib/libcufft.so.9.0"], + data = ["cuda/lib/libcufft.so.9.0"], includes = [ ".", "cuda/include", @@ -145,8 +146,8 @@ cc_library( cc_library( name = "curand", - srcs = ["cuda/lib/libcurand.so.8.0"], - data = ["cuda/lib/libcurand.so.8.0"], + srcs = ["cuda/lib/libcurand.so.9.0"], + data = ["cuda/lib/libcurand.so.9.0"], includes = [ ".", "cuda/include", @@ -183,7 +184,7 @@ cc_library( cc_library( name = "cupti_dsos", - data = ["cuda/lib/libcupti.so.8.0"], + data = ["cuda/lib/libcupti.so.9.0"], includes = [ ".", "cuda/include", @@ -200,1063 +201,990 @@ cc_library( genrule( name = "cuda-include", outs = [ - "cuda/include/math_functions.hpp", - "cuda/include/cufft.h", - "cuda/include/nvgraph.h", - "cuda/include/curand_normal.h", - "cuda/include/curand_uniform.h", - "cuda/include/nppi_data_exchange_and_initialization.h", - "cuda/include/cuda_gl_interop.h", - "cuda/include/nppi_compression_functions.h", - "cuda/include/npp.h", + "cuda/include/CL/cl.h", + "cuda/include/CL/cl.hpp", + "cuda/include/CL/cl_egl.h", + "cuda/include/CL/cl_ext.h", + "cuda/include/CL/cl_gl.h", + "cuda/include/CL/cl_gl_ext.h", + "cuda/include/CL/cl_platform.h", + "cuda/include/CL/opencl.h", + "cuda/include/builtin_types.h", + "cuda/include/channel_descriptor.h", + "cuda/include/common_functions.h", + "cuda/include/cooperative_groups.h", + "cuda/include/cooperative_groups_helpers.h", + "cuda/include/crt/common_functions.h", + "cuda/include/crt/device_double_functions.h", + "cuda/include/crt/device_double_functions.hpp", + "cuda/include/crt/device_functions.h", + "cuda/include/crt/device_functions.hpp", + "cuda/include/crt/func_macro.h", + "cuda/include/crt/host_config.h", + "cuda/include/crt/host_defines.h", + "cuda/include/crt/host_runtime.h", + "cuda/include/crt/math_functions.h", + "cuda/include/crt/math_functions.hpp", + "cuda/include/crt/mma.h", + "cuda/include/crt/mma.hpp", + "cuda/include/crt/nvfunctional", + "cuda/include/crt/sm_70_rt.h", + "cuda/include/crt/sm_70_rt.hpp", + "cuda/include/crt/storage_class.h", + "cuda/include/cuComplex.h", + "cuda/include/cublas.h", + "cuda/include/cublasXt.h", + "cuda/include/cublas_api.h", + "cuda/include/cublas_v2.h", "cuda/include/cuda.h", - "cuda/include/nppi_statistics_functions.h", - "cuda/include/vector_functions.hpp", - "cuda/include/sm_32_intrinsics.hpp", - "cuda/include/sm_32_intrinsics.h", - "cuda/include/curand_discrete.h", + "cuda/include/cudaEGL.h", + "cuda/include/cudaGL.h", + "cuda/include/cudaProfiler.h", + "cuda/include/cudaVDPAU.h", + "cuda/include/cuda_device_runtime_api.h", + "cuda/include/cuda_fp16.h", + "cuda/include/cuda_fp16.hpp", + "cuda/include/cuda_gl_interop.h", + "cuda/include/cuda_occupancy.h", + "cuda/include/cuda_profiler_api.h", "cuda/include/cuda_runtime.h", + "cuda/include/cuda_runtime_api.h", + "cuda/include/cuda_surface_types.h", + "cuda/include/cuda_texture_types.h", + "cuda/include/cuda_vdpau_interop.h", + "cuda/include/cudalibxt.h", + "cuda/include/cudnn.h", + "cuda/include/cufft.h", "cuda/include/cufftXt.h", - "cuda/include/sm_61_intrinsics.h", - "cuda/include/texture_fetch_functions.h", + "cuda/include/cufftw.h", + "cuda/include/curand.h", + "cuda/include/curand_discrete.h", + "cuda/include/curand_discrete2.h", + "cuda/include/curand_globals.h", + "cuda/include/curand_kernel.h", + "cuda/include/curand_lognormal.h", "cuda/include/curand_mrg32k3a.h", - "cuda/include/host_defines.h", - "cuda/include/common_functions.h", - "cuda/include/nppi_support_functions.h", - "cuda/include/nppi_linear_transforms.h", - "cuda/include/device_double_functions.hpp", - "cuda/include/math_constants.h", - "cuda/include/nvToolsExtSync.h", - "cuda/include/npps_initialization.h", + "cuda/include/curand_mtgp32.h", + "cuda/include/curand_mtgp32_host.h", + "cuda/include/curand_mtgp32_kernel.h", + "cuda/include/curand_mtgp32dc_p_11213.h", + "cuda/include/curand_normal.h", + "cuda/include/curand_normal_static.h", + "cuda/include/curand_philox4x32_x.h", + "cuda/include/curand_poisson.h", + "cuda/include/curand_precalc.h", + "cuda/include/curand_uniform.h", + "cuda/include/cusolverDn.h", + "cuda/include/cusolverRf.h", + "cuda/include/cusolverSp.h", "cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h", - "cuda/include/texture_indirect_functions.hpp", - "cuda/include/cudaProfiler.h", - "cuda/include/npps_filtering_functions.h", + "cuda/include/cusolver_common.h", + "cuda/include/cusparse.h", "cuda/include/cusparse_v2.h", - "cuda/include/nppi.h", - "cuda/include/surface_indirect_functions.h", - "cuda/include/sm_30_intrinsics.h", + "cuda/include/device_atomic_functions.h", + "cuda/include/device_atomic_functions.hpp", "cuda/include/device_double_functions.h", - "cuda/include/sm_35_intrinsics.h", - "cuda/include/cusolverSp.h", - "cuda/include/library_types.h", - "cuda/include/surface_indirect_functions.hpp", - "cuda/include/cudalibxt.h", - "cuda/include/channel_descriptor.h", + "cuda/include/device_double_functions.hpp", + "cuda/include/device_functions.h", + "cuda/include/device_functions.hpp", "cuda/include/device_functions_decls.h", - "cuda/include/curand_kernel.h", - "cuda/include/curand_mtgp32_host.h", - "cuda/include/nvToolsExtCuda.h", - "cuda/include/nvToolsExt.h", - "cuda/include/cuComplex.h", - "cuda/include/sm_32_atomic_functions.h", - "cuda/include/texture_indirect_functions.h", - "cuda/include/sm_32_atomic_functions.hpp", - "cuda/include/sm_20_intrinsics.hpp", "cuda/include/device_launch_parameters.h", - "cuda/include/curand_mtgp32.h", - "cuda/include/texture_fetch_functions.hpp", - "cuda/include/cuda_occupancy.h", - "cuda/include/CL/opencl.h", - "cuda/include/CL/cl_platform.h", - "cuda/include/CL/cl_egl.h", - "cuda/include/CL/cl_gl.h", - "cuda/include/CL/cl.h", - "cuda/include/CL/cl_gl_ext.h", - "cuda/include/CL/cl_ext.h", - "cuda/include/CL/cl.hpp", + "cuda/include/device_types.h", + "cuda/include/driver_functions.h", + "cuda/include/driver_types.h", + "cuda/include/dynlink_cuda.h", + "cuda/include/dynlink_cuda_cuda.h", + "cuda/include/dynlink_cuviddec.h", + "cuda/include/dynlink_nvcuvid.h", + "cuda/include/fatBinaryCtl.h", + "cuda/include/fatbinary.h", "cuda/include/host_config.h", - "cuda/include/cuda_surface_types.h", + "cuda/include/host_defines.h", + "cuda/include/library_types.h", + "cuda/include/math_constants.h", "cuda/include/math_functions.h", + "cuda/include/math_functions.hpp", + "cuda/include/math_functions_dbl_ptx3.h", + "cuda/include/math_functions_dbl_ptx3.hpp", + "cuda/include/mma.h", + "cuda/include/npp.h", + "cuda/include/nppcore.h", + "cuda/include/nppdefs.h", + "cuda/include/nppi.h", + "cuda/include/nppi_arithmetic_and_logical_operations.h", + "cuda/include/nppi_color_conversion.h", + "cuda/include/nppi_compression_functions.h", + "cuda/include/nppi_computer_vision.h", + "cuda/include/nppi_data_exchange_and_initialization.h", + "cuda/include/nppi_filtering_functions.h", + "cuda/include/nppi_geometry_transforms.h", + "cuda/include/nppi_linear_transforms.h", + "cuda/include/nppi_morphological_operations.h", + "cuda/include/nppi_statistics_functions.h", + "cuda/include/nppi_support_functions.h", + "cuda/include/nppi_threshold_and_compare_operations.h", + "cuda/include/npps.h", + "cuda/include/npps_arithmetic_and_logical_operations.h", + "cuda/include/npps_conversion_functions.h", + "cuda/include/npps_filtering_functions.h", + "cuda/include/npps_initialization.h", + "cuda/include/npps_statistics_functions.h", + "cuda/include/npps_support_functions.h", + "cuda/include/nppversion.h", + "cuda/include/nvToolsExt.h", + "cuda/include/nvToolsExtCuda.h", + "cuda/include/nvToolsExtCudaRt.h", "cuda/include/nvToolsExtMeta.h", + "cuda/include/nvToolsExtSync.h", + "cuda/include/nvblas.h", + "cuda/include/nvfunctional", + "cuda/include/nvgraph.h", + "cuda/include/nvml.h", + "cuda/include/nvrtc.h", + "cuda/include/sm_20_atomic_functions.h", "cuda/include/sm_20_atomic_functions.hpp", - "cuda/include/device_functions.h", - "cuda/include/device_types.h", - "cuda/include/npps_conversion_functions.h", - "cuda/include/curand_precalc.h", - "cuda/include/cusolverRf.h", + "cuda/include/sm_20_intrinsics.h", + "cuda/include/sm_20_intrinsics.hpp", + "cuda/include/sm_30_intrinsics.h", + "cuda/include/sm_30_intrinsics.hpp", + "cuda/include/sm_32_atomic_functions.h", + "cuda/include/sm_32_atomic_functions.hpp", + "cuda/include/sm_32_intrinsics.h", + "cuda/include/sm_32_intrinsics.hpp", + "cuda/include/sm_35_atomic_functions.h", + "cuda/include/sm_35_intrinsics.h", + "cuda/include/sm_60_atomic_functions.h", "cuda/include/sm_60_atomic_functions.hpp", - "cuda/include/cuviddec.h", - "cuda/include/curand_discrete2.h", - "cuda/include/device_functions.hpp", - "cuda/include/thrust/transform_scan.h", - "cuda/include/thrust/system_error.h", - "cuda/include/thrust/device_malloc.h", - "cuda/include/thrust/partition.h", - "cuda/include/thrust/unique.h", - "cuda/include/thrust/device_delete.h", - "cuda/include/thrust/execution_policy.h", + "cuda/include/sm_61_intrinsics.h", + "cuda/include/sm_61_intrinsics.hpp", + "cuda/include/sobol_direction_vectors.h", + "cuda/include/surface_functions.h", + "cuda/include/surface_functions.hpp", + "cuda/include/surface_indirect_functions.h", + "cuda/include/surface_indirect_functions.hpp", + "cuda/include/surface_types.h", + "cuda/include/texture_fetch_functions.h", + "cuda/include/texture_fetch_functions.hpp", + "cuda/include/texture_indirect_functions.h", + "cuda/include/texture_indirect_functions.hpp", + "cuda/include/texture_types.h", "cuda/include/thrust/adjacent_difference.h", - "cuda/include/thrust/sequence.h", - "cuda/include/thrust/merge.h", - "cuda/include/thrust/device_new.h", - "cuda/include/thrust/transform_reduce.h", - "cuda/include/thrust/device_vector.h", - "cuda/include/thrust/gather.h", - "cuda/include/thrust/sort.h", - "cuda/include/thrust/scan.h", - "cuda/include/thrust/detail/temporary_array.h", - "cuda/include/thrust/detail/util/align.h", - "cuda/include/thrust/detail/util/blocking.h", - "cuda/include/thrust/detail/transform.inl", - "cuda/include/thrust/detail/device_vector.inl", + "cuda/include/thrust/advance.h", + "cuda/include/thrust/binary_search.h", + "cuda/include/thrust/complex.h", + "cuda/include/thrust/copy.h", + "cuda/include/thrust/count.h", + "cuda/include/thrust/detail/adjacent_difference.inl", + "cuda/include/thrust/detail/advance.inl", + "cuda/include/thrust/detail/allocator/allocator_traits.h", + "cuda/include/thrust/detail/allocator/allocator_traits.inl", + "cuda/include/thrust/detail/allocator/copy_construct_range.h", + "cuda/include/thrust/detail/allocator/copy_construct_range.inl", + "cuda/include/thrust/detail/allocator/default_construct_range.h", + "cuda/include/thrust/detail/allocator/default_construct_range.inl", + "cuda/include/thrust/detail/allocator/destroy_range.h", + "cuda/include/thrust/detail/allocator/destroy_range.inl", + "cuda/include/thrust/detail/allocator/fill_construct_range.h", + "cuda/include/thrust/detail/allocator/fill_construct_range.inl", + "cuda/include/thrust/detail/allocator/malloc_allocator.h", + "cuda/include/thrust/detail/allocator/malloc_allocator.inl", + "cuda/include/thrust/detail/allocator/no_throw_allocator.h", + "cuda/include/thrust/detail/allocator/tagged_allocator.h", + "cuda/include/thrust/detail/allocator/tagged_allocator.inl", + "cuda/include/thrust/detail/allocator/temporary_allocator.h", + "cuda/include/thrust/detail/allocator/temporary_allocator.inl", "cuda/include/thrust/detail/binary_search.inl", - "cuda/include/thrust/detail/overlapped_copy.h", - "cuda/include/thrust/detail/vector_base.inl", - "cuda/include/thrust/detail/device_reference.inl", - "cuda/include/thrust/detail/functional/actor.h", - "cuda/include/thrust/detail/functional/value.h", - "cuda/include/thrust/detail/functional/operators.h", - "cuda/include/thrust/detail/functional/operators/logical_operators.h", - "cuda/include/thrust/detail/functional/operators/relational_operators.h", - "cuda/include/thrust/detail/functional/operators/assignment_operator.h", - "cuda/include/thrust/detail/functional/operators/bitwise_operators.h", - "cuda/include/thrust/detail/functional/operators/operator_adaptors.h", - "cuda/include/thrust/detail/functional/operators/arithmetic_operators.h", - "cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h", - "cuda/include/thrust/detail/functional/argument.h", - "cuda/include/thrust/detail/functional/placeholder.h", - "cuda/include/thrust/detail/functional/actor.inl", - "cuda/include/thrust/detail/functional/composite.h", - "cuda/include/thrust/detail/static_map.h", - "cuda/include/thrust/detail/type_traits/has_nested_type.h", - "cuda/include/thrust/detail/type_traits/is_call_possible.h", - "cuda/include/thrust/detail/type_traits/function_traits.h", - "cuda/include/thrust/detail/type_traits/pointer_traits.h", - "cuda/include/thrust/detail/type_traits/has_member_function.h", - "cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h", - "cuda/include/thrust/detail/type_traits/minimum_type.h", - "cuda/include/thrust/detail/type_traits/has_trivial_assign.h", - "cuda/include/thrust/detail/type_traits/is_metafunction_defined.h", - "cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h", - "cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h", - "cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h", - "cuda/include/thrust/detail/reference.h", - "cuda/include/thrust/detail/inner_product.inl", - "cuda/include/thrust/detail/use_default.h", - "cuda/include/thrust/detail/sequence.inl", - "cuda/include/thrust/detail/sort.inl", - "cuda/include/thrust/detail/equal.inl", - "cuda/include/thrust/detail/execution_policy.h", - "cuda/include/thrust/detail/integer_traits.h", - "cuda/include/thrust/detail/type_traits.h", - "cuda/include/thrust/detail/reverse.inl", - "cuda/include/thrust/detail/tabulate.inl", - "cuda/include/thrust/detail/unique.inl", - "cuda/include/thrust/detail/scatter.inl", - "cuda/include/thrust/detail/set_operations.inl", - "cuda/include/thrust/detail/device_malloc.inl", - "cuda/include/thrust/detail/copy_if.inl", - "cuda/include/thrust/detail/fill.inl", - "cuda/include/thrust/detail/temporary_array.inl", - "cuda/include/thrust/detail/transform_scan.inl", - "cuda/include/thrust/detail/minmax.h", - "cuda/include/thrust/detail/swap.inl", - "cuda/include/thrust/detail/pointer.inl", - "cuda/include/thrust/detail/transform_reduce.inl", - "cuda/include/thrust/detail/config.h", - "cuda/include/thrust/detail/distance.inl", - "cuda/include/thrust/detail/pair.inl", - "cuda/include/thrust/detail/allocator/temporary_allocator.h", - "cuda/include/thrust/detail/allocator/tagged_allocator.h", - "cuda/include/thrust/detail/allocator/destroy_range.inl", - "cuda/include/thrust/detail/allocator/destroy_range.h", - "cuda/include/thrust/detail/allocator/no_throw_allocator.h", - "cuda/include/thrust/detail/allocator/default_construct_range.inl", - "cuda/include/thrust/detail/allocator/fill_construct_range.inl", - "cuda/include/thrust/detail/allocator/tagged_allocator.inl", - "cuda/include/thrust/detail/allocator/malloc_allocator.h", - "cuda/include/thrust/detail/allocator/allocator_traits.h", - "cuda/include/thrust/detail/allocator/copy_construct_range.h", - "cuda/include/thrust/detail/allocator/allocator_traits.inl", - "cuda/include/thrust/detail/allocator/default_construct_range.h", - "cuda/include/thrust/detail/allocator/copy_construct_range.inl", - "cuda/include/thrust/detail/allocator/malloc_allocator.inl", - "cuda/include/thrust/detail/allocator/temporary_allocator.inl", - "cuda/include/thrust/detail/allocator/fill_construct_range.h", - "cuda/include/thrust/detail/temporary_buffer.h", - "cuda/include/thrust/detail/reduce.inl", - "cuda/include/thrust/detail/device_new.inl", - "cuda/include/thrust/detail/pointer.h", - "cuda/include/thrust/detail/for_each.inl", - "cuda/include/thrust/detail/generate.inl", - "cuda/include/thrust/detail/dispatch/is_trivial_copy.h", - "cuda/include/thrust/detail/adjacent_difference.inl", - "cuda/include/thrust/detail/tuple_meta_transform.h", - "cuda/include/thrust/detail/functional.inl", - "cuda/include/thrust/detail/remove.inl", - "cuda/include/thrust/detail/tuple_transform.h", - "cuda/include/thrust/detail/merge.inl", - "cuda/include/thrust/detail/extrema.inl", - "cuda/include/thrust/detail/trivial_sequence.h", - "cuda/include/thrust/detail/vector_base.h", - "cuda/include/thrust/detail/count.inl", - "cuda/include/thrust/detail/uninitialized_copy.inl", - "cuda/include/thrust/detail/function.h", - "cuda/include/thrust/detail/swap_ranges.inl", - "cuda/include/thrust/detail/device_delete.inl", - "cuda/include/thrust/detail/static_assert.h", - "cuda/include/thrust/detail/logical.inl", - "cuda/include/thrust/detail/seq.h", - "cuda/include/thrust/detail/mpl/math.h", - "cuda/include/thrust/detail/mismatch.inl", - "cuda/include/thrust/detail/internal_functional.h", - "cuda/include/thrust/detail/get_iterator_value.h", - "cuda/include/thrust/detail/copy.inl", - "cuda/include/thrust/detail/copy.h", + "cuda/include/thrust/detail/complex/arithmetic.h", + "cuda/include/thrust/detail/complex/c99math.h", + "cuda/include/thrust/detail/complex/catrig.h", "cuda/include/thrust/detail/complex/catrigf.h", - "cuda/include/thrust/detail/complex/cpowf.h", - "cuda/include/thrust/detail/complex/csqrtf.h", + "cuda/include/thrust/detail/complex/ccosh.h", "cuda/include/thrust/detail/complex/ccoshf.h", - "cuda/include/thrust/detail/complex/csinhf.h", + "cuda/include/thrust/detail/complex/cexp.h", + "cuda/include/thrust/detail/complex/cexpf.h", + "cuda/include/thrust/detail/complex/clog.h", "cuda/include/thrust/detail/complex/clogf.h", - "cuda/include/thrust/detail/complex/ccosh.h", - "cuda/include/thrust/detail/complex/arithmetic.h", - "cuda/include/thrust/detail/complex/csqrt.h", - "cuda/include/thrust/detail/complex/cpow.h", "cuda/include/thrust/detail/complex/complex.inl", - "cuda/include/thrust/detail/complex/math_private.h", - "cuda/include/thrust/detail/complex/c99math.h", + "cuda/include/thrust/detail/complex/cpow.h", + "cuda/include/thrust/detail/complex/cpowf.h", "cuda/include/thrust/detail/complex/cproj.h", - "cuda/include/thrust/detail/complex/catrig.h", - "cuda/include/thrust/detail/complex/ctanhf.h", - "cuda/include/thrust/detail/complex/cexpf.h", "cuda/include/thrust/detail/complex/csinh.h", - "cuda/include/thrust/detail/complex/stream.h", + "cuda/include/thrust/detail/complex/csinhf.h", + "cuda/include/thrust/detail/complex/csqrt.h", + "cuda/include/thrust/detail/complex/csqrtf.h", "cuda/include/thrust/detail/complex/ctanh.h", - "cuda/include/thrust/detail/complex/cexp.h", - "cuda/include/thrust/detail/complex/clog.h", - "cuda/include/thrust/detail/range/head_flags.h", - "cuda/include/thrust/detail/range/tail_flags.h", - "cuda/include/thrust/detail/execute_with_allocator.h", - "cuda/include/thrust/detail/integer_math.h", - "cuda/include/thrust/detail/swap.h", - "cuda/include/thrust/detail/uninitialized_fill.inl", - "cuda/include/thrust/detail/scan.inl", - "cuda/include/thrust/detail/gather.inl", - "cuda/include/thrust/detail/reference_forward_declaration.h", - "cuda/include/thrust/detail/numeric_traits.h", - "cuda/include/thrust/detail/reference.inl", - "cuda/include/thrust/detail/cstdint.h", - "cuda/include/thrust/detail/device_free.inl", - "cuda/include/thrust/detail/copy_if.h", - "cuda/include/thrust/detail/partition.inl", - "cuda/include/thrust/detail/find.inl", - "cuda/include/thrust/detail/config/forceinline.h", - "cuda/include/thrust/detail/config/debug.h", - "cuda/include/thrust/detail/config/config.h", - "cuda/include/thrust/detail/config/host_device.h", - "cuda/include/thrust/detail/config/host_system.h", + "cuda/include/thrust/detail/complex/ctanhf.h", + "cuda/include/thrust/detail/complex/math_private.h", + "cuda/include/thrust/detail/complex/stream.h", + "cuda/include/thrust/detail/config.h", "cuda/include/thrust/detail/config/compiler.h", - "cuda/include/thrust/detail/config/device_system.h", "cuda/include/thrust/detail/config/compiler_fence.h", + "cuda/include/thrust/detail/config/config.h", + "cuda/include/thrust/detail/config/debug.h", + "cuda/include/thrust/detail/config/device_system.h", "cuda/include/thrust/detail/config/exec_check_disable.h", - "cuda/include/thrust/detail/config/simple_defines.h", + "cuda/include/thrust/detail/config/forceinline.h", "cuda/include/thrust/detail/config/global_workarounds.h", - "cuda/include/thrust/detail/replace.inl", + "cuda/include/thrust/detail/config/host_device.h", + "cuda/include/thrust/detail/config/host_system.h", + "cuda/include/thrust/detail/config/simple_defines.h", + "cuda/include/thrust/detail/contiguous_storage.h", + "cuda/include/thrust/detail/contiguous_storage.inl", + "cuda/include/thrust/detail/copy.h", + "cuda/include/thrust/detail/copy.inl", + "cuda/include/thrust/detail/copy_if.h", + "cuda/include/thrust/detail/copy_if.inl", + "cuda/include/thrust/detail/count.inl", + "cuda/include/thrust/detail/cstdint.h", + "cuda/include/thrust/detail/device_delete.inl", + "cuda/include/thrust/detail/device_free.inl", + "cuda/include/thrust/detail/device_malloc.inl", + "cuda/include/thrust/detail/device_new.inl", "cuda/include/thrust/detail/device_ptr.inl", - "cuda/include/thrust/detail/tuple.inl", - "cuda/include/thrust/detail/malloc_and_free.h", + "cuda/include/thrust/detail/device_reference.inl", + "cuda/include/thrust/detail/device_vector.inl", + "cuda/include/thrust/detail/dispatch/is_trivial_copy.h", + "cuda/include/thrust/detail/distance.inl", + "cuda/include/thrust/detail/equal.inl", + "cuda/include/thrust/detail/execute_with_allocator.h", + "cuda/include/thrust/detail/execution_policy.h", + "cuda/include/thrust/detail/extrema.inl", + "cuda/include/thrust/detail/fill.inl", + "cuda/include/thrust/detail/find.inl", + "cuda/include/thrust/detail/for_each.inl", + "cuda/include/thrust/detail/function.h", + "cuda/include/thrust/detail/functional.inl", + "cuda/include/thrust/detail/functional/actor.h", + "cuda/include/thrust/detail/functional/actor.inl", + "cuda/include/thrust/detail/functional/argument.h", + "cuda/include/thrust/detail/functional/composite.h", + "cuda/include/thrust/detail/functional/operators.h", + "cuda/include/thrust/detail/functional/operators/arithmetic_operators.h", + "cuda/include/thrust/detail/functional/operators/assignment_operator.h", + "cuda/include/thrust/detail/functional/operators/bitwise_operators.h", + "cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h", + "cuda/include/thrust/detail/functional/operators/logical_operators.h", + "cuda/include/thrust/detail/functional/operators/operator_adaptors.h", + "cuda/include/thrust/detail/functional/operators/relational_operators.h", + "cuda/include/thrust/detail/functional/placeholder.h", + "cuda/include/thrust/detail/functional/value.h", + "cuda/include/thrust/detail/gather.inl", + "cuda/include/thrust/detail/generate.inl", + "cuda/include/thrust/detail/get_iterator_value.h", "cuda/include/thrust/detail/host_vector.inl", + "cuda/include/thrust/detail/inner_product.inl", + "cuda/include/thrust/detail/integer_math.h", + "cuda/include/thrust/detail/integer_traits.h", + "cuda/include/thrust/detail/internal_functional.h", + "cuda/include/thrust/detail/logical.inl", + "cuda/include/thrust/detail/malloc_and_free.h", + "cuda/include/thrust/detail/merge.inl", + "cuda/include/thrust/detail/minmax.h", + "cuda/include/thrust/detail/mismatch.inl", + "cuda/include/thrust/detail/mpl/math.h", + "cuda/include/thrust/detail/numeric_traits.h", + "cuda/include/thrust/detail/overlapped_copy.h", + "cuda/include/thrust/detail/pair.inl", + "cuda/include/thrust/detail/partition.inl", + "cuda/include/thrust/detail/pointer.h", + "cuda/include/thrust/detail/pointer.inl", + "cuda/include/thrust/detail/range/head_flags.h", + "cuda/include/thrust/detail/range/tail_flags.h", "cuda/include/thrust/detail/raw_pointer_cast.h", - "cuda/include/thrust/detail/advance.inl", - "cuda/include/thrust/detail/contiguous_storage.h", "cuda/include/thrust/detail/raw_reference_cast.h", - "cuda/include/thrust/detail/contiguous_storage.inl", - "cuda/include/thrust/reverse.h", - "cuda/include/thrust/device_malloc_allocator.h", - "cuda/include/thrust/scatter.h", - "cuda/include/thrust/pair.h", - "cuda/include/thrust/advance.h", - "cuda/include/thrust/find.h", - "cuda/include/thrust/device_ptr.h", - "cuda/include/thrust/generate.h", - "cuda/include/thrust/uninitialized_fill.h", - "cuda/include/thrust/system/system_error.h", - "cuda/include/thrust/system/detail/bad_alloc.h", - "cuda/include/thrust/system/detail/adl/transform_scan.h", - "cuda/include/thrust/system/detail/adl/unique_by_key.h", - "cuda/include/thrust/system/detail/adl/partition.h", - "cuda/include/thrust/system/detail/adl/unique.h", - "cuda/include/thrust/system/detail/adl/adjacent_difference.h", - "cuda/include/thrust/system/detail/adl/sequence.h", - "cuda/include/thrust/system/detail/adl/merge.h", - "cuda/include/thrust/system/detail/adl/transform_reduce.h", - "cuda/include/thrust/system/detail/adl/gather.h", - "cuda/include/thrust/system/detail/adl/sort.h", - "cuda/include/thrust/system/detail/adl/scan.h", - "cuda/include/thrust/system/detail/adl/temporary_buffer.h", - "cuda/include/thrust/system/detail/adl/scan_by_key.h", - "cuda/include/thrust/system/detail/adl/reverse.h", - "cuda/include/thrust/system/detail/adl/assign_value.h", - "cuda/include/thrust/system/detail/adl/scatter.h", - "cuda/include/thrust/system/detail/adl/find.h", - "cuda/include/thrust/system/detail/adl/generate.h", - "cuda/include/thrust/system/detail/adl/uninitialized_fill.h", - "cuda/include/thrust/system/detail/adl/remove.h", - "cuda/include/thrust/system/detail/adl/tabulate.h", - "cuda/include/thrust/system/detail/adl/for_each.h", - "cuda/include/thrust/system/detail/adl/reduce_by_key.h", - "cuda/include/thrust/system/detail/adl/reduce.h", - "cuda/include/thrust/system/detail/adl/equal.h", - "cuda/include/thrust/system/detail/adl/copy.h", - "cuda/include/thrust/system/detail/adl/swap_ranges.h", - "cuda/include/thrust/system/detail/adl/uninitialized_copy.h", - "cuda/include/thrust/system/detail/adl/binary_search.h", - "cuda/include/thrust/system/detail/adl/set_operations.h", - "cuda/include/thrust/system/detail/adl/mismatch.h", - "cuda/include/thrust/system/detail/adl/extrema.h", - "cuda/include/thrust/system/detail/adl/count.h", - "cuda/include/thrust/system/detail/adl/replace.h", + "cuda/include/thrust/detail/reduce.inl", + "cuda/include/thrust/detail/reference.h", + "cuda/include/thrust/detail/reference.inl", + "cuda/include/thrust/detail/reference_forward_declaration.h", + "cuda/include/thrust/detail/remove.inl", + "cuda/include/thrust/detail/replace.inl", + "cuda/include/thrust/detail/reverse.inl", + "cuda/include/thrust/detail/scan.inl", + "cuda/include/thrust/detail/scatter.inl", + "cuda/include/thrust/detail/seq.h", + "cuda/include/thrust/detail/sequence.inl", + "cuda/include/thrust/detail/set_operations.inl", + "cuda/include/thrust/detail/sort.inl", + "cuda/include/thrust/detail/static_assert.h", + "cuda/include/thrust/detail/static_map.h", + "cuda/include/thrust/detail/swap.h", + "cuda/include/thrust/detail/swap.inl", + "cuda/include/thrust/detail/swap_ranges.inl", + "cuda/include/thrust/detail/tabulate.inl", + "cuda/include/thrust/detail/temporary_array.h", + "cuda/include/thrust/detail/temporary_array.inl", + "cuda/include/thrust/detail/temporary_buffer.h", + "cuda/include/thrust/detail/transform.inl", + "cuda/include/thrust/detail/transform_reduce.inl", + "cuda/include/thrust/detail/transform_scan.inl", + "cuda/include/thrust/detail/trivial_sequence.h", + "cuda/include/thrust/detail/tuple.inl", + "cuda/include/thrust/detail/tuple_meta_transform.h", + "cuda/include/thrust/detail/tuple_transform.h", + "cuda/include/thrust/detail/type_traits.h", + "cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h", + "cuda/include/thrust/detail/type_traits/function_traits.h", + "cuda/include/thrust/detail/type_traits/has_member_function.h", + "cuda/include/thrust/detail/type_traits/has_nested_type.h", + "cuda/include/thrust/detail/type_traits/has_trivial_assign.h", + "cuda/include/thrust/detail/type_traits/is_call_possible.h", + "cuda/include/thrust/detail/type_traits/is_metafunction_defined.h", + "cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h", + "cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h", + "cuda/include/thrust/detail/type_traits/minimum_type.h", + "cuda/include/thrust/detail/type_traits/pointer_traits.h", + "cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h", + "cuda/include/thrust/detail/uninitialized_copy.inl", + "cuda/include/thrust/detail/uninitialized_fill.inl", + "cuda/include/thrust/detail/unique.inl", + "cuda/include/thrust/detail/use_default.h", + "cuda/include/thrust/detail/util/align.h", + "cuda/include/thrust/detail/util/blocking.h", + "cuda/include/thrust/detail/vector_base.h", + "cuda/include/thrust/detail/vector_base.inl", + "cuda/include/thrust/device_allocator.h", + "cuda/include/thrust/device_delete.h", + "cuda/include/thrust/device_free.h", + "cuda/include/thrust/device_malloc.h", + "cuda/include/thrust/device_malloc_allocator.h", + "cuda/include/thrust/device_new.h", + "cuda/include/thrust/device_new_allocator.h", + "cuda/include/thrust/device_ptr.h", + "cuda/include/thrust/device_reference.h", + "cuda/include/thrust/device_vector.h", + "cuda/include/thrust/distance.h", + "cuda/include/thrust/equal.h", + "cuda/include/thrust/execution_policy.h", + "cuda/include/thrust/extrema.h", + "cuda/include/thrust/fill.h", + "cuda/include/thrust/find.h", + "cuda/include/thrust/for_each.h", + "cuda/include/thrust/functional.h", + "cuda/include/thrust/gather.h", + "cuda/include/thrust/generate.h", + "cuda/include/thrust/host_vector.h", + "cuda/include/thrust/inner_product.h", + "cuda/include/thrust/iterator/constant_iterator.h", + "cuda/include/thrust/iterator/counting_iterator.h", + "cuda/include/thrust/iterator/detail/any_assign.h", + "cuda/include/thrust/iterator/detail/any_system_tag.h", + "cuda/include/thrust/iterator/detail/constant_iterator_base.h", + "cuda/include/thrust/iterator/detail/counting_iterator.inl", + "cuda/include/thrust/iterator/detail/device_system_tag.h", + "cuda/include/thrust/iterator/detail/discard_iterator_base.h", + "cuda/include/thrust/iterator/detail/distance_from_result.h", + "cuda/include/thrust/iterator/detail/host_system_tag.h", + "cuda/include/thrust/iterator/detail/is_iterator_category.h", + "cuda/include/thrust/iterator/detail/is_trivial_iterator.h", + "cuda/include/thrust/iterator/detail/iterator_adaptor_base.h", + "cuda/include/thrust/iterator/detail/iterator_category_to_system.h", + "cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h", + "cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h", + "cuda/include/thrust/iterator/detail/iterator_facade_category.h", + "cuda/include/thrust/iterator/detail/iterator_traits.inl", + "cuda/include/thrust/iterator/detail/iterator_traversal_tags.h", + "cuda/include/thrust/iterator/detail/join_iterator.h", + "cuda/include/thrust/iterator/detail/minimum_category.h", + "cuda/include/thrust/iterator/detail/minimum_system.h", + "cuda/include/thrust/iterator/detail/normal_iterator.h", + "cuda/include/thrust/iterator/detail/permutation_iterator_base.h", + "cuda/include/thrust/iterator/detail/retag.h", + "cuda/include/thrust/iterator/detail/reverse_iterator.inl", + "cuda/include/thrust/iterator/detail/reverse_iterator_base.h", + "cuda/include/thrust/iterator/detail/tagged_iterator.h", + "cuda/include/thrust/iterator/detail/transform_iterator.inl", + "cuda/include/thrust/iterator/detail/transform_output_iterator.inl", + "cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h", + "cuda/include/thrust/iterator/detail/universal_categories.h", + "cuda/include/thrust/iterator/detail/zip_iterator.inl", + "cuda/include/thrust/iterator/detail/zip_iterator_base.h", + "cuda/include/thrust/iterator/discard_iterator.h", + "cuda/include/thrust/iterator/iterator_adaptor.h", + "cuda/include/thrust/iterator/iterator_categories.h", + "cuda/include/thrust/iterator/iterator_facade.h", + "cuda/include/thrust/iterator/iterator_traits.h", + "cuda/include/thrust/iterator/permutation_iterator.h", + "cuda/include/thrust/iterator/retag.h", + "cuda/include/thrust/iterator/reverse_iterator.h", + "cuda/include/thrust/iterator/transform_iterator.h", + "cuda/include/thrust/iterator/transform_output_iterator.h", + "cuda/include/thrust/iterator/zip_iterator.h", + "cuda/include/thrust/logical.h", + "cuda/include/thrust/memory.h", + "cuda/include/thrust/merge.h", + "cuda/include/thrust/mismatch.h", + "cuda/include/thrust/pair.h", + "cuda/include/thrust/partition.h", + "cuda/include/thrust/random.h", + "cuda/include/thrust/random/detail/discard_block_engine.inl", + "cuda/include/thrust/random/detail/linear_congruential_engine.inl", + "cuda/include/thrust/random/detail/linear_congruential_engine_discard.h", + "cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl", + "cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h", + "cuda/include/thrust/random/detail/mod.h", + "cuda/include/thrust/random/detail/normal_distribution.inl", + "cuda/include/thrust/random/detail/normal_distribution_base.h", + "cuda/include/thrust/random/detail/random_core_access.h", + "cuda/include/thrust/random/detail/subtract_with_carry_engine.inl", + "cuda/include/thrust/random/detail/uniform_int_distribution.inl", + "cuda/include/thrust/random/detail/uniform_real_distribution.inl", + "cuda/include/thrust/random/detail/xor_combine_engine.inl", + "cuda/include/thrust/random/detail/xor_combine_engine_max.h", + "cuda/include/thrust/random/discard_block_engine.h", + "cuda/include/thrust/random/linear_congruential_engine.h", + "cuda/include/thrust/random/linear_feedback_shift_engine.h", + "cuda/include/thrust/random/normal_distribution.h", + "cuda/include/thrust/random/subtract_with_carry_engine.h", + "cuda/include/thrust/random/uniform_int_distribution.h", + "cuda/include/thrust/random/uniform_real_distribution.h", + "cuda/include/thrust/random/xor_combine_engine.h", + "cuda/include/thrust/reduce.h", + "cuda/include/thrust/remove.h", + "cuda/include/thrust/replace.h", + "cuda/include/thrust/reverse.h", + "cuda/include/thrust/scan.h", + "cuda/include/thrust/scatter.h", + "cuda/include/thrust/sequence.h", + "cuda/include/thrust/set_operations.h", + "cuda/include/thrust/sort.h", + "cuda/include/thrust/swap.h", + "cuda/include/thrust/system/cpp/detail/adjacent_difference.h", + "cuda/include/thrust/system/cpp/detail/assign_value.h", + "cuda/include/thrust/system/cpp/detail/binary_search.h", + "cuda/include/thrust/system/cpp/detail/copy.h", + "cuda/include/thrust/system/cpp/detail/copy_if.h", + "cuda/include/thrust/system/cpp/detail/count.h", + "cuda/include/thrust/system/cpp/detail/equal.h", + "cuda/include/thrust/system/cpp/detail/execution_policy.h", + "cuda/include/thrust/system/cpp/detail/extrema.h", + "cuda/include/thrust/system/cpp/detail/fill.h", + "cuda/include/thrust/system/cpp/detail/find.h", + "cuda/include/thrust/system/cpp/detail/for_each.h", + "cuda/include/thrust/system/cpp/detail/gather.h", + "cuda/include/thrust/system/cpp/detail/generate.h", + "cuda/include/thrust/system/cpp/detail/get_value.h", + "cuda/include/thrust/system/cpp/detail/inner_product.h", + "cuda/include/thrust/system/cpp/detail/iter_swap.h", + "cuda/include/thrust/system/cpp/detail/logical.h", + "cuda/include/thrust/system/cpp/detail/malloc_and_free.h", + "cuda/include/thrust/system/cpp/detail/memory.inl", + "cuda/include/thrust/system/cpp/detail/merge.h", + "cuda/include/thrust/system/cpp/detail/mismatch.h", + "cuda/include/thrust/system/cpp/detail/par.h", + "cuda/include/thrust/system/cpp/detail/partition.h", + "cuda/include/thrust/system/cpp/detail/reduce.h", + "cuda/include/thrust/system/cpp/detail/reduce_by_key.h", + "cuda/include/thrust/system/cpp/detail/remove.h", + "cuda/include/thrust/system/cpp/detail/replace.h", + "cuda/include/thrust/system/cpp/detail/reverse.h", + "cuda/include/thrust/system/cpp/detail/scan.h", + "cuda/include/thrust/system/cpp/detail/scan_by_key.h", + "cuda/include/thrust/system/cpp/detail/scatter.h", + "cuda/include/thrust/system/cpp/detail/sequence.h", + "cuda/include/thrust/system/cpp/detail/set_operations.h", + "cuda/include/thrust/system/cpp/detail/sort.h", + "cuda/include/thrust/system/cpp/detail/swap_ranges.h", + "cuda/include/thrust/system/cpp/detail/tabulate.h", + "cuda/include/thrust/system/cpp/detail/temporary_buffer.h", + "cuda/include/thrust/system/cpp/detail/transform.h", + "cuda/include/thrust/system/cpp/detail/transform_reduce.h", + "cuda/include/thrust/system/cpp/detail/transform_scan.h", + "cuda/include/thrust/system/cpp/detail/uninitialized_copy.h", + "cuda/include/thrust/system/cpp/detail/uninitialized_fill.h", + "cuda/include/thrust/system/cpp/detail/unique.h", + "cuda/include/thrust/system/cpp/detail/unique_by_key.h", + "cuda/include/thrust/system/cpp/detail/vector.inl", + "cuda/include/thrust/system/cpp/execution_policy.h", + "cuda/include/thrust/system/cpp/memory.h", + "cuda/include/thrust/system/cpp/vector.h", + "cuda/include/thrust/system/cuda/config.h", + "cuda/include/thrust/system/cuda/detail/adjacent_difference.h", + "cuda/include/thrust/system/cuda/detail/assign_value.h", + "cuda/include/thrust/system/cuda/detail/binary_search.h", + "cuda/include/thrust/system/cuda/detail/copy.h", + "cuda/include/thrust/system/cuda/detail/copy_if.h", + "cuda/include/thrust/system/cuda/detail/core/agent_launcher.h", + "cuda/include/thrust/system/cuda/detail/core/alignment.h", + "cuda/include/thrust/system/cuda/detail/core/triple_chevron_launch.h", + "cuda/include/thrust/system/cuda/detail/core/util.h", + "cuda/include/thrust/system/cuda/detail/count.h", + "cuda/include/thrust/system/cuda/detail/cross_system.h", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh", + "cuda/include/thrust/system/cuda/detail/cub/cub.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh", + "cuda/include/thrust/system/cuda/detail/cub/host/mutex.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_device.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_type.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh", + "cuda/include/thrust/system/cuda/detail/equal.h", + "cuda/include/thrust/system/cuda/detail/error.inl", + "cuda/include/thrust/system/cuda/detail/execution_policy.h", + "cuda/include/thrust/system/cuda/detail/extrema.h", + "cuda/include/thrust/system/cuda/detail/fill.h", + "cuda/include/thrust/system/cuda/detail/find.h", + "cuda/include/thrust/system/cuda/detail/for_each.h", + "cuda/include/thrust/system/cuda/detail/gather.h", + "cuda/include/thrust/system/cuda/detail/generate.h", + "cuda/include/thrust/system/cuda/detail/get_value.h", + "cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h", + "cuda/include/thrust/system/cuda/detail/guarded_driver_types.h", + "cuda/include/thrust/system/cuda/detail/inner_product.h", + "cuda/include/thrust/system/cuda/detail/internal/copy_cross_system.h", + "cuda/include/thrust/system/cuda/detail/internal/copy_device_to_device.h", + "cuda/include/thrust/system/cuda/detail/iter_swap.h", + "cuda/include/thrust/system/cuda/detail/logical.h", + "cuda/include/thrust/system/cuda/detail/malloc_and_free.h", + "cuda/include/thrust/system/cuda/detail/memory.inl", + "cuda/include/thrust/system/cuda/detail/memory_buffer.h", + "cuda/include/thrust/system/cuda/detail/merge.h", + "cuda/include/thrust/system/cuda/detail/mismatch.h", + "cuda/include/thrust/system/cuda/detail/par.h", + "cuda/include/thrust/system/cuda/detail/par_to_seq.h", + "cuda/include/thrust/system/cuda/detail/parallel_for.h", + "cuda/include/thrust/system/cuda/detail/partition.h", + "cuda/include/thrust/system/cuda/detail/reduce.h", + "cuda/include/thrust/system/cuda/detail/reduce_by_key.h", + "cuda/include/thrust/system/cuda/detail/remove.h", + "cuda/include/thrust/system/cuda/detail/replace.h", + "cuda/include/thrust/system/cuda/detail/reverse.h", + "cuda/include/thrust/system/cuda/detail/scan.h", + "cuda/include/thrust/system/cuda/detail/scan_by_key.h", + "cuda/include/thrust/system/cuda/detail/scatter.h", + "cuda/include/thrust/system/cuda/detail/sequence.h", + "cuda/include/thrust/system/cuda/detail/set_operations.h", + "cuda/include/thrust/system/cuda/detail/sort.h", + "cuda/include/thrust/system/cuda/detail/swap_ranges.h", + "cuda/include/thrust/system/cuda/detail/tabulate.h", + "cuda/include/thrust/system/cuda/detail/temporary_buffer.h", + "cuda/include/thrust/system/cuda/detail/terminate.h", + "cuda/include/thrust/system/cuda/detail/transform.h", + "cuda/include/thrust/system/cuda/detail/transform_reduce.h", + "cuda/include/thrust/system/cuda/detail/transform_scan.h", + "cuda/include/thrust/system/cuda/detail/uninitialized_copy.h", + "cuda/include/thrust/system/cuda/detail/uninitialized_fill.h", + "cuda/include/thrust/system/cuda/detail/unique.h", + "cuda/include/thrust/system/cuda/detail/unique_by_key.h", + "cuda/include/thrust/system/cuda/detail/util.h", + "cuda/include/thrust/system/cuda/detail/vector.inl", + "cuda/include/thrust/system/cuda/error.h", + "cuda/include/thrust/system/cuda/execution_policy.h", + "cuda/include/thrust/system/cuda/experimental/pinned_allocator.h", + "cuda/include/thrust/system/cuda/memory.h", + "cuda/include/thrust/system/cuda/vector.h", + "cuda/include/thrust/system/detail/adl/adjacent_difference.h", + "cuda/include/thrust/system/detail/adl/assign_value.h", + "cuda/include/thrust/system/detail/adl/binary_search.h", + "cuda/include/thrust/system/detail/adl/copy.h", + "cuda/include/thrust/system/detail/adl/copy_if.h", + "cuda/include/thrust/system/detail/adl/count.h", + "cuda/include/thrust/system/detail/adl/equal.h", + "cuda/include/thrust/system/detail/adl/extrema.h", + "cuda/include/thrust/system/detail/adl/fill.h", + "cuda/include/thrust/system/detail/adl/find.h", + "cuda/include/thrust/system/detail/adl/for_each.h", + "cuda/include/thrust/system/detail/adl/gather.h", + "cuda/include/thrust/system/detail/adl/generate.h", "cuda/include/thrust/system/detail/adl/get_value.h", "cuda/include/thrust/system/detail/adl/inner_product.h", - "cuda/include/thrust/system/detail/adl/copy_if.h", - "cuda/include/thrust/system/detail/adl/logical.h", "cuda/include/thrust/system/detail/adl/iter_swap.h", + "cuda/include/thrust/system/detail/adl/logical.h", "cuda/include/thrust/system/detail/adl/malloc_and_free.h", - "cuda/include/thrust/system/detail/adl/fill.h", + "cuda/include/thrust/system/detail/adl/merge.h", + "cuda/include/thrust/system/detail/adl/mismatch.h", + "cuda/include/thrust/system/detail/adl/partition.h", + "cuda/include/thrust/system/detail/adl/reduce.h", + "cuda/include/thrust/system/detail/adl/reduce_by_key.h", + "cuda/include/thrust/system/detail/adl/remove.h", + "cuda/include/thrust/system/detail/adl/replace.h", + "cuda/include/thrust/system/detail/adl/reverse.h", + "cuda/include/thrust/system/detail/adl/scan.h", + "cuda/include/thrust/system/detail/adl/scan_by_key.h", + "cuda/include/thrust/system/detail/adl/scatter.h", + "cuda/include/thrust/system/detail/adl/sequence.h", + "cuda/include/thrust/system/detail/adl/set_operations.h", + "cuda/include/thrust/system/detail/adl/sort.h", + "cuda/include/thrust/system/detail/adl/swap_ranges.h", + "cuda/include/thrust/system/detail/adl/tabulate.h", + "cuda/include/thrust/system/detail/adl/temporary_buffer.h", "cuda/include/thrust/system/detail/adl/transform.h", + "cuda/include/thrust/system/detail/adl/transform_reduce.h", + "cuda/include/thrust/system/detail/adl/transform_scan.h", + "cuda/include/thrust/system/detail/adl/uninitialized_copy.h", + "cuda/include/thrust/system/detail/adl/uninitialized_fill.h", + "cuda/include/thrust/system/detail/adl/unique.h", + "cuda/include/thrust/system/detail/adl/unique_by_key.h", + "cuda/include/thrust/system/detail/bad_alloc.h", "cuda/include/thrust/system/detail/errno.h", "cuda/include/thrust/system/detail/error_category.inl", - "cuda/include/thrust/system/detail/sequential/transform_scan.h", - "cuda/include/thrust/system/detail/sequential/unique_by_key.h", - "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h", - "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl", - "cuda/include/thrust/system/detail/sequential/stable_merge_sort.h", - "cuda/include/thrust/system/detail/sequential/sort.inl", - "cuda/include/thrust/system/detail/sequential/partition.h", - "cuda/include/thrust/system/detail/sequential/unique.h", - "cuda/include/thrust/system/detail/sequential/execution_policy.h", - "cuda/include/thrust/system/detail/sequential/adjacent_difference.h", - "cuda/include/thrust/system/detail/sequential/sequence.h", - "cuda/include/thrust/system/detail/sequential/merge.h", - "cuda/include/thrust/system/detail/sequential/transform_reduce.h", - "cuda/include/thrust/system/detail/sequential/gather.h", - "cuda/include/thrust/system/detail/sequential/sort.h", - "cuda/include/thrust/system/detail/sequential/copy_backward.h", - "cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl", - "cuda/include/thrust/system/detail/sequential/scan.h", - "cuda/include/thrust/system/detail/sequential/temporary_buffer.h", - "cuda/include/thrust/system/detail/sequential/scan_by_key.h", - "cuda/include/thrust/system/detail/sequential/reverse.h", - "cuda/include/thrust/system/detail/sequential/assign_value.h", - "cuda/include/thrust/system/detail/sequential/scatter.h", - "cuda/include/thrust/system/detail/sequential/find.h", - "cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl", - "cuda/include/thrust/system/detail/sequential/merge.inl", - "cuda/include/thrust/system/detail/sequential/generate.h", - "cuda/include/thrust/system/detail/sequential/uninitialized_fill.h", - "cuda/include/thrust/system/detail/sequential/general_copy.h", - "cuda/include/thrust/system/detail/sequential/insertion_sort.h", - "cuda/include/thrust/system/detail/sequential/remove.h", - "cuda/include/thrust/system/detail/sequential/tabulate.h", - "cuda/include/thrust/system/detail/sequential/for_each.h", - "cuda/include/thrust/system/detail/sequential/reduce_by_key.h", - "cuda/include/thrust/system/detail/sequential/reduce.h", - "cuda/include/thrust/system/detail/sequential/equal.h", - "cuda/include/thrust/system/detail/sequential/stable_radix_sort.h", - "cuda/include/thrust/system/detail/sequential/copy.inl", - "cuda/include/thrust/system/detail/sequential/copy.h", - "cuda/include/thrust/system/detail/sequential/swap_ranges.h", - "cuda/include/thrust/system/detail/sequential/uninitialized_copy.h", - "cuda/include/thrust/system/detail/sequential/binary_search.h", - "cuda/include/thrust/system/detail/sequential/set_operations.h", - "cuda/include/thrust/system/detail/sequential/mismatch.h", - "cuda/include/thrust/system/detail/sequential/extrema.h", - "cuda/include/thrust/system/detail/sequential/count.h", - "cuda/include/thrust/system/detail/sequential/trivial_copy.h", - "cuda/include/thrust/system/detail/sequential/replace.h", - "cuda/include/thrust/system/detail/sequential/get_value.h", - "cuda/include/thrust/system/detail/sequential/inner_product.h", - "cuda/include/thrust/system/detail/sequential/copy_if.h", - "cuda/include/thrust/system/detail/sequential/logical.h", - "cuda/include/thrust/system/detail/sequential/iter_swap.h", - "cuda/include/thrust/system/detail/sequential/malloc_and_free.h", - "cuda/include/thrust/system/detail/sequential/fill.h", - "cuda/include/thrust/system/detail/sequential/transform.h", - "cuda/include/thrust/system/detail/error_condition.inl", - "cuda/include/thrust/system/detail/internal/decompose.h", "cuda/include/thrust/system/detail/error_code.inl", - "cuda/include/thrust/system/detail/generic/transform_scan.h", - "cuda/include/thrust/system/detail/generic/memory.inl", - "cuda/include/thrust/system/detail/generic/transform.inl", - "cuda/include/thrust/system/detail/generic/binary_search.inl", - "cuda/include/thrust/system/detail/generic/scan_by_key.inl", - "cuda/include/thrust/system/detail/generic/unique_by_key.h", - "cuda/include/thrust/system/detail/generic/inner_product.inl", - "cuda/include/thrust/system/detail/generic/select_system.h", - "cuda/include/thrust/system/detail/generic/sequence.inl", - "cuda/include/thrust/system/detail/generic/sort.inl", - "cuda/include/thrust/system/detail/generic/equal.inl", - "cuda/include/thrust/system/detail/generic/partition.h", - "cuda/include/thrust/system/detail/generic/unique.h", + "cuda/include/thrust/system/detail/error_condition.inl", "cuda/include/thrust/system/detail/generic/adjacent_difference.h", - "cuda/include/thrust/system/detail/generic/tag.h", - "cuda/include/thrust/system/detail/generic/unique_by_key.inl", - "cuda/include/thrust/system/detail/generic/sequence.h", - "cuda/include/thrust/system/detail/generic/type_traits.h", - "cuda/include/thrust/system/detail/generic/merge.h", - "cuda/include/thrust/system/detail/generic/reverse.inl", - "cuda/include/thrust/system/detail/generic/tabulate.inl", - "cuda/include/thrust/system/detail/generic/unique.inl", - "cuda/include/thrust/system/detail/generic/scatter.inl", - "cuda/include/thrust/system/detail/generic/set_operations.inl", - "cuda/include/thrust/system/detail/generic/copy_if.inl", - "cuda/include/thrust/system/detail/generic/transform_reduce.h", - "cuda/include/thrust/system/detail/generic/transform_scan.inl", - "cuda/include/thrust/system/detail/generic/gather.h", - "cuda/include/thrust/system/detail/generic/reduce_by_key.inl", - "cuda/include/thrust/system/detail/generic/transform_reduce.inl", - "cuda/include/thrust/system/detail/generic/sort.h", - "cuda/include/thrust/system/detail/generic/distance.inl", - "cuda/include/thrust/system/detail/generic/scan.h", - "cuda/include/thrust/system/detail/generic/temporary_buffer.h", - "cuda/include/thrust/system/detail/generic/reduce.inl", - "cuda/include/thrust/system/detail/generic/scan_by_key.h", - "cuda/include/thrust/system/detail/generic/reverse.h", - "cuda/include/thrust/system/detail/generic/temporary_buffer.inl", - "cuda/include/thrust/system/detail/generic/scatter.h", - "cuda/include/thrust/system/detail/generic/generate.inl", "cuda/include/thrust/system/detail/generic/adjacent_difference.inl", - "cuda/include/thrust/system/detail/generic/remove.inl", "cuda/include/thrust/system/detail/generic/advance.h", - "cuda/include/thrust/system/detail/generic/find.h", - "cuda/include/thrust/system/detail/generic/merge.inl", - "cuda/include/thrust/system/detail/generic/scalar/binary_search.inl", - "cuda/include/thrust/system/detail/generic/scalar/binary_search.h", - "cuda/include/thrust/system/detail/generic/extrema.inl", - "cuda/include/thrust/system/detail/generic/generate.h", - "cuda/include/thrust/system/detail/generic/uninitialized_fill.h", + "cuda/include/thrust/system/detail/generic/advance.inl", + "cuda/include/thrust/system/detail/generic/binary_search.h", + "cuda/include/thrust/system/detail/generic/binary_search.inl", + "cuda/include/thrust/system/detail/generic/copy.h", + "cuda/include/thrust/system/detail/generic/copy.inl", + "cuda/include/thrust/system/detail/generic/copy_if.h", + "cuda/include/thrust/system/detail/generic/copy_if.inl", + "cuda/include/thrust/system/detail/generic/count.h", "cuda/include/thrust/system/detail/generic/count.inl", - "cuda/include/thrust/system/detail/generic/remove.h", - "cuda/include/thrust/system/detail/generic/uninitialized_copy.inl", - "cuda/include/thrust/system/detail/generic/tabulate.h", - "cuda/include/thrust/system/detail/generic/for_each.h", "cuda/include/thrust/system/detail/generic/distance.h", - "cuda/include/thrust/system/detail/generic/swap_ranges.inl", - "cuda/include/thrust/system/detail/generic/reduce_by_key.h", - "cuda/include/thrust/system/detail/generic/reduce.h", + "cuda/include/thrust/system/detail/generic/distance.inl", "cuda/include/thrust/system/detail/generic/equal.h", - "cuda/include/thrust/system/detail/generic/mismatch.inl", - "cuda/include/thrust/system/detail/generic/copy.inl", - "cuda/include/thrust/system/detail/generic/copy.h", - "cuda/include/thrust/system/detail/generic/swap_ranges.h", - "cuda/include/thrust/system/detail/generic/uninitialized_copy.h", - "cuda/include/thrust/system/detail/generic/binary_search.h", - "cuda/include/thrust/system/detail/generic/set_operations.h", - "cuda/include/thrust/system/detail/generic/uninitialized_fill.inl", - "cuda/include/thrust/system/detail/generic/mismatch.h", - "cuda/include/thrust/system/detail/generic/scan.inl", - "cuda/include/thrust/system/detail/generic/gather.inl", + "cuda/include/thrust/system/detail/generic/equal.inl", "cuda/include/thrust/system/detail/generic/extrema.h", - "cuda/include/thrust/system/detail/generic/count.h", - "cuda/include/thrust/system/detail/generic/replace.h", + "cuda/include/thrust/system/detail/generic/extrema.inl", + "cuda/include/thrust/system/detail/generic/fill.h", + "cuda/include/thrust/system/detail/generic/find.h", + "cuda/include/thrust/system/detail/generic/find.inl", + "cuda/include/thrust/system/detail/generic/for_each.h", + "cuda/include/thrust/system/detail/generic/gather.h", + "cuda/include/thrust/system/detail/generic/gather.inl", + "cuda/include/thrust/system/detail/generic/generate.h", + "cuda/include/thrust/system/detail/generic/generate.inl", "cuda/include/thrust/system/detail/generic/inner_product.h", - "cuda/include/thrust/system/detail/generic/copy_if.h", + "cuda/include/thrust/system/detail/generic/inner_product.inl", "cuda/include/thrust/system/detail/generic/logical.h", - "cuda/include/thrust/system/detail/generic/partition.inl", "cuda/include/thrust/system/detail/generic/memory.h", - "cuda/include/thrust/system/detail/generic/find.inl", + "cuda/include/thrust/system/detail/generic/memory.inl", + "cuda/include/thrust/system/detail/generic/merge.h", + "cuda/include/thrust/system/detail/generic/merge.inl", + "cuda/include/thrust/system/detail/generic/mismatch.h", + "cuda/include/thrust/system/detail/generic/mismatch.inl", + "cuda/include/thrust/system/detail/generic/partition.h", + "cuda/include/thrust/system/detail/generic/partition.inl", + "cuda/include/thrust/system/detail/generic/reduce.h", + "cuda/include/thrust/system/detail/generic/reduce.inl", + "cuda/include/thrust/system/detail/generic/reduce_by_key.h", + "cuda/include/thrust/system/detail/generic/reduce_by_key.inl", + "cuda/include/thrust/system/detail/generic/remove.h", + "cuda/include/thrust/system/detail/generic/remove.inl", + "cuda/include/thrust/system/detail/generic/replace.h", "cuda/include/thrust/system/detail/generic/replace.inl", - "cuda/include/thrust/system/detail/generic/advance.inl", - "cuda/include/thrust/system/detail/generic/fill.h", + "cuda/include/thrust/system/detail/generic/reverse.h", + "cuda/include/thrust/system/detail/generic/reverse.inl", + "cuda/include/thrust/system/detail/generic/scalar/binary_search.h", + "cuda/include/thrust/system/detail/generic/scalar/binary_search.inl", + "cuda/include/thrust/system/detail/generic/scan.h", + "cuda/include/thrust/system/detail/generic/scan.inl", + "cuda/include/thrust/system/detail/generic/scan_by_key.h", + "cuda/include/thrust/system/detail/generic/scan_by_key.inl", + "cuda/include/thrust/system/detail/generic/scatter.h", + "cuda/include/thrust/system/detail/generic/scatter.inl", + "cuda/include/thrust/system/detail/generic/select_system.h", + "cuda/include/thrust/system/detail/generic/sequence.h", + "cuda/include/thrust/system/detail/generic/sequence.inl", + "cuda/include/thrust/system/detail/generic/set_operations.h", + "cuda/include/thrust/system/detail/generic/set_operations.inl", + "cuda/include/thrust/system/detail/generic/sort.h", + "cuda/include/thrust/system/detail/generic/sort.inl", + "cuda/include/thrust/system/detail/generic/swap_ranges.h", + "cuda/include/thrust/system/detail/generic/swap_ranges.inl", + "cuda/include/thrust/system/detail/generic/tabulate.h", + "cuda/include/thrust/system/detail/generic/tabulate.inl", + "cuda/include/thrust/system/detail/generic/tag.h", + "cuda/include/thrust/system/detail/generic/temporary_buffer.h", + "cuda/include/thrust/system/detail/generic/temporary_buffer.inl", "cuda/include/thrust/system/detail/generic/transform.h", + "cuda/include/thrust/system/detail/generic/transform.inl", + "cuda/include/thrust/system/detail/generic/transform_reduce.h", + "cuda/include/thrust/system/detail/generic/transform_reduce.inl", + "cuda/include/thrust/system/detail/generic/transform_scan.h", + "cuda/include/thrust/system/detail/generic/transform_scan.inl", + "cuda/include/thrust/system/detail/generic/type_traits.h", + "cuda/include/thrust/system/detail/generic/uninitialized_copy.h", + "cuda/include/thrust/system/detail/generic/uninitialized_copy.inl", + "cuda/include/thrust/system/detail/generic/uninitialized_fill.h", + "cuda/include/thrust/system/detail/generic/uninitialized_fill.inl", + "cuda/include/thrust/system/detail/generic/unique.h", + "cuda/include/thrust/system/detail/generic/unique.inl", + "cuda/include/thrust/system/detail/generic/unique_by_key.h", + "cuda/include/thrust/system/detail/generic/unique_by_key.inl", + "cuda/include/thrust/system/detail/internal/decompose.h", + "cuda/include/thrust/system/detail/sequential/adjacent_difference.h", + "cuda/include/thrust/system/detail/sequential/assign_value.h", + "cuda/include/thrust/system/detail/sequential/binary_search.h", + "cuda/include/thrust/system/detail/sequential/copy.h", + "cuda/include/thrust/system/detail/sequential/copy.inl", + "cuda/include/thrust/system/detail/sequential/copy_backward.h", + "cuda/include/thrust/system/detail/sequential/copy_if.h", + "cuda/include/thrust/system/detail/sequential/count.h", + "cuda/include/thrust/system/detail/sequential/equal.h", + "cuda/include/thrust/system/detail/sequential/execution_policy.h", + "cuda/include/thrust/system/detail/sequential/extrema.h", + "cuda/include/thrust/system/detail/sequential/fill.h", + "cuda/include/thrust/system/detail/sequential/find.h", + "cuda/include/thrust/system/detail/sequential/for_each.h", + "cuda/include/thrust/system/detail/sequential/gather.h", + "cuda/include/thrust/system/detail/sequential/general_copy.h", + "cuda/include/thrust/system/detail/sequential/generate.h", + "cuda/include/thrust/system/detail/sequential/get_value.h", + "cuda/include/thrust/system/detail/sequential/inner_product.h", + "cuda/include/thrust/system/detail/sequential/insertion_sort.h", + "cuda/include/thrust/system/detail/sequential/iter_swap.h", + "cuda/include/thrust/system/detail/sequential/logical.h", + "cuda/include/thrust/system/detail/sequential/malloc_and_free.h", + "cuda/include/thrust/system/detail/sequential/merge.h", + "cuda/include/thrust/system/detail/sequential/merge.inl", + "cuda/include/thrust/system/detail/sequential/mismatch.h", + "cuda/include/thrust/system/detail/sequential/partition.h", + "cuda/include/thrust/system/detail/sequential/reduce.h", + "cuda/include/thrust/system/detail/sequential/reduce_by_key.h", + "cuda/include/thrust/system/detail/sequential/remove.h", + "cuda/include/thrust/system/detail/sequential/replace.h", + "cuda/include/thrust/system/detail/sequential/reverse.h", + "cuda/include/thrust/system/detail/sequential/scan.h", + "cuda/include/thrust/system/detail/sequential/scan_by_key.h", + "cuda/include/thrust/system/detail/sequential/scatter.h", + "cuda/include/thrust/system/detail/sequential/sequence.h", + "cuda/include/thrust/system/detail/sequential/set_operations.h", + "cuda/include/thrust/system/detail/sequential/sort.h", + "cuda/include/thrust/system/detail/sequential/sort.inl", + "cuda/include/thrust/system/detail/sequential/stable_merge_sort.h", + "cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl", + "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h", + "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl", + "cuda/include/thrust/system/detail/sequential/stable_radix_sort.h", + "cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl", + "cuda/include/thrust/system/detail/sequential/swap_ranges.h", + "cuda/include/thrust/system/detail/sequential/tabulate.h", + "cuda/include/thrust/system/detail/sequential/temporary_buffer.h", + "cuda/include/thrust/system/detail/sequential/transform.h", + "cuda/include/thrust/system/detail/sequential/transform_reduce.h", + "cuda/include/thrust/system/detail/sequential/transform_scan.h", + "cuda/include/thrust/system/detail/sequential/trivial_copy.h", + "cuda/include/thrust/system/detail/sequential/uninitialized_copy.h", + "cuda/include/thrust/system/detail/sequential/uninitialized_fill.h", + "cuda/include/thrust/system/detail/sequential/unique.h", + "cuda/include/thrust/system/detail/sequential/unique_by_key.h", "cuda/include/thrust/system/detail/system_error.inl", - "cuda/include/thrust/system/omp/execution_policy.h", - "cuda/include/thrust/system/omp/vector.h", - "cuda/include/thrust/system/omp/detail/transform_scan.h", - "cuda/include/thrust/system/omp/detail/memory.inl", - "cuda/include/thrust/system/omp/detail/reduce_intervals.inl", - "cuda/include/thrust/system/omp/detail/unique_by_key.h", - "cuda/include/thrust/system/omp/detail/sort.inl", - "cuda/include/thrust/system/omp/detail/partition.h", - "cuda/include/thrust/system/omp/detail/unique.h", - "cuda/include/thrust/system/omp/detail/execution_policy.h", + "cuda/include/thrust/system/error_code.h", "cuda/include/thrust/system/omp/detail/adjacent_difference.h", - "cuda/include/thrust/system/omp/detail/unique_by_key.inl", - "cuda/include/thrust/system/omp/detail/sequence.h", - "cuda/include/thrust/system/omp/detail/merge.h", - "cuda/include/thrust/system/omp/detail/unique.inl", + "cuda/include/thrust/system/omp/detail/assign_value.h", + "cuda/include/thrust/system/omp/detail/binary_search.h", + "cuda/include/thrust/system/omp/detail/copy.h", + "cuda/include/thrust/system/omp/detail/copy.inl", + "cuda/include/thrust/system/omp/detail/copy_if.h", "cuda/include/thrust/system/omp/detail/copy_if.inl", - "cuda/include/thrust/system/omp/detail/transform_reduce.h", - "cuda/include/thrust/system/omp/detail/gather.h", - "cuda/include/thrust/system/omp/detail/reduce_by_key.inl", - "cuda/include/thrust/system/omp/detail/sort.h", - "cuda/include/thrust/system/omp/detail/scan.h", - "cuda/include/thrust/system/omp/detail/temporary_buffer.h", + "cuda/include/thrust/system/omp/detail/count.h", "cuda/include/thrust/system/omp/detail/default_decomposition.h", - "cuda/include/thrust/system/omp/detail/reduce.inl", - "cuda/include/thrust/system/omp/detail/scan_by_key.h", - "cuda/include/thrust/system/omp/detail/reverse.h", - "cuda/include/thrust/system/omp/detail/assign_value.h", - "cuda/include/thrust/system/omp/detail/scatter.h", - "cuda/include/thrust/system/omp/detail/for_each.inl", "cuda/include/thrust/system/omp/detail/default_decomposition.inl", - "cuda/include/thrust/system/omp/detail/remove.inl", - "cuda/include/thrust/system/omp/detail/vector.inl", - "cuda/include/thrust/system/omp/detail/find.h", - "cuda/include/thrust/system/omp/detail/generate.h", - "cuda/include/thrust/system/omp/detail/uninitialized_fill.h", - "cuda/include/thrust/system/omp/detail/remove.h", - "cuda/include/thrust/system/omp/detail/tabulate.h", - "cuda/include/thrust/system/omp/detail/for_each.h", - "cuda/include/thrust/system/omp/detail/reduce_by_key.h", - "cuda/include/thrust/system/omp/detail/reduce.h", "cuda/include/thrust/system/omp/detail/equal.h", - "cuda/include/thrust/system/omp/detail/copy.inl", - "cuda/include/thrust/system/omp/detail/copy.h", - "cuda/include/thrust/system/omp/detail/swap_ranges.h", - "cuda/include/thrust/system/omp/detail/uninitialized_copy.h", - "cuda/include/thrust/system/omp/detail/binary_search.h", - "cuda/include/thrust/system/omp/detail/set_operations.h", - "cuda/include/thrust/system/omp/detail/mismatch.h", + "cuda/include/thrust/system/omp/detail/execution_policy.h", "cuda/include/thrust/system/omp/detail/extrema.h", - "cuda/include/thrust/system/omp/detail/count.h", - "cuda/include/thrust/system/omp/detail/replace.h", + "cuda/include/thrust/system/omp/detail/fill.h", + "cuda/include/thrust/system/omp/detail/find.h", + "cuda/include/thrust/system/omp/detail/for_each.h", + "cuda/include/thrust/system/omp/detail/for_each.inl", + "cuda/include/thrust/system/omp/detail/gather.h", + "cuda/include/thrust/system/omp/detail/generate.h", "cuda/include/thrust/system/omp/detail/get_value.h", "cuda/include/thrust/system/omp/detail/inner_product.h", - "cuda/include/thrust/system/omp/detail/copy_if.h", - "cuda/include/thrust/system/omp/detail/logical.h", - "cuda/include/thrust/system/omp/detail/partition.inl", "cuda/include/thrust/system/omp/detail/iter_swap.h", + "cuda/include/thrust/system/omp/detail/logical.h", + "cuda/include/thrust/system/omp/detail/malloc_and_free.h", + "cuda/include/thrust/system/omp/detail/memory.inl", + "cuda/include/thrust/system/omp/detail/merge.h", + "cuda/include/thrust/system/omp/detail/mismatch.h", "cuda/include/thrust/system/omp/detail/par.h", + "cuda/include/thrust/system/omp/detail/partition.h", + "cuda/include/thrust/system/omp/detail/partition.inl", + "cuda/include/thrust/system/omp/detail/reduce.h", + "cuda/include/thrust/system/omp/detail/reduce.inl", + "cuda/include/thrust/system/omp/detail/reduce_by_key.h", + "cuda/include/thrust/system/omp/detail/reduce_by_key.inl", "cuda/include/thrust/system/omp/detail/reduce_intervals.h", - "cuda/include/thrust/system/omp/detail/malloc_and_free.h", - "cuda/include/thrust/system/omp/detail/fill.h", + "cuda/include/thrust/system/omp/detail/reduce_intervals.inl", + "cuda/include/thrust/system/omp/detail/remove.h", + "cuda/include/thrust/system/omp/detail/remove.inl", + "cuda/include/thrust/system/omp/detail/replace.h", + "cuda/include/thrust/system/omp/detail/reverse.h", + "cuda/include/thrust/system/omp/detail/scan.h", + "cuda/include/thrust/system/omp/detail/scan_by_key.h", + "cuda/include/thrust/system/omp/detail/scatter.h", + "cuda/include/thrust/system/omp/detail/sequence.h", + "cuda/include/thrust/system/omp/detail/set_operations.h", + "cuda/include/thrust/system/omp/detail/sort.h", + "cuda/include/thrust/system/omp/detail/sort.inl", + "cuda/include/thrust/system/omp/detail/swap_ranges.h", + "cuda/include/thrust/system/omp/detail/tabulate.h", + "cuda/include/thrust/system/omp/detail/temporary_buffer.h", "cuda/include/thrust/system/omp/detail/transform.h", - "cuda/include/thrust/system/omp/memory.h", - "cuda/include/thrust/system/tbb/execution_policy.h", - "cuda/include/thrust/system/tbb/vector.h", - "cuda/include/thrust/system/tbb/detail/transform_scan.h", - "cuda/include/thrust/system/tbb/detail/memory.inl", - "cuda/include/thrust/system/tbb/detail/unique_by_key.h", - "cuda/include/thrust/system/tbb/detail/sort.inl", - "cuda/include/thrust/system/tbb/detail/partition.h", - "cuda/include/thrust/system/tbb/detail/unique.h", - "cuda/include/thrust/system/tbb/detail/execution_policy.h", + "cuda/include/thrust/system/omp/detail/transform_reduce.h", + "cuda/include/thrust/system/omp/detail/transform_scan.h", + "cuda/include/thrust/system/omp/detail/uninitialized_copy.h", + "cuda/include/thrust/system/omp/detail/uninitialized_fill.h", + "cuda/include/thrust/system/omp/detail/unique.h", + "cuda/include/thrust/system/omp/detail/unique.inl", + "cuda/include/thrust/system/omp/detail/unique_by_key.h", + "cuda/include/thrust/system/omp/detail/unique_by_key.inl", + "cuda/include/thrust/system/omp/detail/vector.inl", + "cuda/include/thrust/system/omp/execution_policy.h", + "cuda/include/thrust/system/omp/memory.h", + "cuda/include/thrust/system/omp/vector.h", + "cuda/include/thrust/system/system_error.h", "cuda/include/thrust/system/tbb/detail/adjacent_difference.h", - "cuda/include/thrust/system/tbb/detail/unique_by_key.inl", - "cuda/include/thrust/system/tbb/detail/sequence.h", - "cuda/include/thrust/system/tbb/detail/merge.h", - "cuda/include/thrust/system/tbb/detail/unique.inl", - "cuda/include/thrust/system/tbb/detail/copy_if.inl", - "cuda/include/thrust/system/tbb/detail/transform_reduce.h", - "cuda/include/thrust/system/tbb/detail/gather.h", - "cuda/include/thrust/system/tbb/detail/reduce_by_key.inl", - "cuda/include/thrust/system/tbb/detail/sort.h", - "cuda/include/thrust/system/tbb/detail/scan.h", - "cuda/include/thrust/system/tbb/detail/temporary_buffer.h", - "cuda/include/thrust/system/tbb/detail/reduce.inl", - "cuda/include/thrust/system/tbb/detail/scan_by_key.h", - "cuda/include/thrust/system/tbb/detail/reverse.h", "cuda/include/thrust/system/tbb/detail/assign_value.h", - "cuda/include/thrust/system/tbb/detail/scatter.h", - "cuda/include/thrust/system/tbb/detail/for_each.inl", - "cuda/include/thrust/system/tbb/detail/remove.inl", - "cuda/include/thrust/system/tbb/detail/vector.inl", - "cuda/include/thrust/system/tbb/detail/find.h", - "cuda/include/thrust/system/tbb/detail/merge.inl", - "cuda/include/thrust/system/tbb/detail/generate.h", - "cuda/include/thrust/system/tbb/detail/uninitialized_fill.h", - "cuda/include/thrust/system/tbb/detail/remove.h", - "cuda/include/thrust/system/tbb/detail/tabulate.h", - "cuda/include/thrust/system/tbb/detail/for_each.h", - "cuda/include/thrust/system/tbb/detail/reduce_by_key.h", - "cuda/include/thrust/system/tbb/detail/reduce.h", - "cuda/include/thrust/system/tbb/detail/equal.h", - "cuda/include/thrust/system/tbb/detail/copy.inl", - "cuda/include/thrust/system/tbb/detail/copy.h", - "cuda/include/thrust/system/tbb/detail/swap_ranges.h", - "cuda/include/thrust/system/tbb/detail/uninitialized_copy.h", "cuda/include/thrust/system/tbb/detail/binary_search.h", - "cuda/include/thrust/system/tbb/detail/set_operations.h", - "cuda/include/thrust/system/tbb/detail/mismatch.h", - "cuda/include/thrust/system/tbb/detail/scan.inl", - "cuda/include/thrust/system/tbb/detail/extrema.h", + "cuda/include/thrust/system/tbb/detail/copy.h", + "cuda/include/thrust/system/tbb/detail/copy.inl", + "cuda/include/thrust/system/tbb/detail/copy_if.h", + "cuda/include/thrust/system/tbb/detail/copy_if.inl", "cuda/include/thrust/system/tbb/detail/count.h", - "cuda/include/thrust/system/tbb/detail/replace.h", + "cuda/include/thrust/system/tbb/detail/equal.h", + "cuda/include/thrust/system/tbb/detail/execution_policy.h", + "cuda/include/thrust/system/tbb/detail/extrema.h", + "cuda/include/thrust/system/tbb/detail/fill.h", + "cuda/include/thrust/system/tbb/detail/find.h", + "cuda/include/thrust/system/tbb/detail/for_each.h", + "cuda/include/thrust/system/tbb/detail/for_each.inl", + "cuda/include/thrust/system/tbb/detail/gather.h", + "cuda/include/thrust/system/tbb/detail/generate.h", "cuda/include/thrust/system/tbb/detail/get_value.h", "cuda/include/thrust/system/tbb/detail/inner_product.h", - "cuda/include/thrust/system/tbb/detail/copy_if.h", - "cuda/include/thrust/system/tbb/detail/logical.h", - "cuda/include/thrust/system/tbb/detail/partition.inl", "cuda/include/thrust/system/tbb/detail/iter_swap.h", + "cuda/include/thrust/system/tbb/detail/logical.h", + "cuda/include/thrust/system/tbb/detail/malloc_and_free.h", + "cuda/include/thrust/system/tbb/detail/memory.inl", + "cuda/include/thrust/system/tbb/detail/merge.h", + "cuda/include/thrust/system/tbb/detail/merge.inl", + "cuda/include/thrust/system/tbb/detail/mismatch.h", "cuda/include/thrust/system/tbb/detail/par.h", + "cuda/include/thrust/system/tbb/detail/partition.h", + "cuda/include/thrust/system/tbb/detail/partition.inl", + "cuda/include/thrust/system/tbb/detail/reduce.h", + "cuda/include/thrust/system/tbb/detail/reduce.inl", + "cuda/include/thrust/system/tbb/detail/reduce_by_key.h", + "cuda/include/thrust/system/tbb/detail/reduce_by_key.inl", "cuda/include/thrust/system/tbb/detail/reduce_intervals.h", - "cuda/include/thrust/system/tbb/detail/malloc_and_free.h", - "cuda/include/thrust/system/tbb/detail/fill.h", + "cuda/include/thrust/system/tbb/detail/remove.h", + "cuda/include/thrust/system/tbb/detail/remove.inl", + "cuda/include/thrust/system/tbb/detail/replace.h", + "cuda/include/thrust/system/tbb/detail/reverse.h", + "cuda/include/thrust/system/tbb/detail/scan.h", + "cuda/include/thrust/system/tbb/detail/scan.inl", + "cuda/include/thrust/system/tbb/detail/scan_by_key.h", + "cuda/include/thrust/system/tbb/detail/scatter.h", + "cuda/include/thrust/system/tbb/detail/sequence.h", + "cuda/include/thrust/system/tbb/detail/set_operations.h", + "cuda/include/thrust/system/tbb/detail/sort.h", + "cuda/include/thrust/system/tbb/detail/sort.inl", + "cuda/include/thrust/system/tbb/detail/swap_ranges.h", + "cuda/include/thrust/system/tbb/detail/tabulate.h", + "cuda/include/thrust/system/tbb/detail/temporary_buffer.h", "cuda/include/thrust/system/tbb/detail/transform.h", - "cuda/include/thrust/system/tbb/memory.h", - "cuda/include/thrust/system/error_code.h", - "cuda/include/thrust/system/cpp/execution_policy.h", - "cuda/include/thrust/system/cpp/vector.h", - "cuda/include/thrust/system/cpp/detail/transform_scan.h", - "cuda/include/thrust/system/cpp/detail/memory.inl", - "cuda/include/thrust/system/cpp/detail/unique_by_key.h", - "cuda/include/thrust/system/cpp/detail/partition.h", - "cuda/include/thrust/system/cpp/detail/unique.h", - "cuda/include/thrust/system/cpp/detail/execution_policy.h", - "cuda/include/thrust/system/cpp/detail/adjacent_difference.h", - "cuda/include/thrust/system/cpp/detail/sequence.h", - "cuda/include/thrust/system/cpp/detail/merge.h", - "cuda/include/thrust/system/cpp/detail/transform_reduce.h", - "cuda/include/thrust/system/cpp/detail/gather.h", - "cuda/include/thrust/system/cpp/detail/sort.h", - "cuda/include/thrust/system/cpp/detail/scan.h", - "cuda/include/thrust/system/cpp/detail/temporary_buffer.h", - "cuda/include/thrust/system/cpp/detail/scan_by_key.h", - "cuda/include/thrust/system/cpp/detail/reverse.h", - "cuda/include/thrust/system/cpp/detail/assign_value.h", - "cuda/include/thrust/system/cpp/detail/scatter.h", - "cuda/include/thrust/system/cpp/detail/vector.inl", - "cuda/include/thrust/system/cpp/detail/find.h", - "cuda/include/thrust/system/cpp/detail/generate.h", - "cuda/include/thrust/system/cpp/detail/uninitialized_fill.h", - "cuda/include/thrust/system/cpp/detail/remove.h", - "cuda/include/thrust/system/cpp/detail/tabulate.h", - "cuda/include/thrust/system/cpp/detail/for_each.h", - "cuda/include/thrust/system/cpp/detail/reduce_by_key.h", - "cuda/include/thrust/system/cpp/detail/reduce.h", - "cuda/include/thrust/system/cpp/detail/equal.h", - "cuda/include/thrust/system/cpp/detail/copy.h", - "cuda/include/thrust/system/cpp/detail/swap_ranges.h", - "cuda/include/thrust/system/cpp/detail/uninitialized_copy.h", - "cuda/include/thrust/system/cpp/detail/binary_search.h", - "cuda/include/thrust/system/cpp/detail/set_operations.h", - "cuda/include/thrust/system/cpp/detail/mismatch.h", - "cuda/include/thrust/system/cpp/detail/extrema.h", - "cuda/include/thrust/system/cpp/detail/count.h", - "cuda/include/thrust/system/cpp/detail/replace.h", - "cuda/include/thrust/system/cpp/detail/get_value.h", - "cuda/include/thrust/system/cpp/detail/inner_product.h", - "cuda/include/thrust/system/cpp/detail/copy_if.h", - "cuda/include/thrust/system/cpp/detail/logical.h", - "cuda/include/thrust/system/cpp/detail/iter_swap.h", - "cuda/include/thrust/system/cpp/detail/par.h", - "cuda/include/thrust/system/cpp/detail/malloc_and_free.h", - "cuda/include/thrust/system/cpp/detail/fill.h", - "cuda/include/thrust/system/cpp/detail/transform.h", - "cuda/include/thrust/system/cpp/memory.h", - "cuda/include/thrust/system/cuda/execution_policy.h", - "cuda/include/thrust/system/cuda/vector.h", - "cuda/include/thrust/system/cuda/error.h", - "cuda/include/thrust/system/cuda/detail/copy_device_to_device.h", - "cuda/include/thrust/system/cuda/detail/transform_scan.h", - "cuda/include/thrust/system/cuda/detail/memory.inl", - "cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_device.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_rle_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_histogram_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_by_key_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_scan_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_select_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_radix_sort_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_histo.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_downsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_upsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_satomic.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_gatomic.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_select.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_scan_prefix_operators.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce_by_key.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_upsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_histogram_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_rle_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_select_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_satomic_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_sort_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_gatomic_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_downsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_by_key_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_prefix_operators.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_type.cuh", - "cuda/include/thrust/system/cuda/detail/cub/host/spinlock.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh", - "cuda/include/thrust/system/cuda/detail/cub/cub.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_shift.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh", - "cuda/include/thrust/system/cuda/detail/reduce_intervals.inl", - "cuda/include/thrust/system/cuda/detail/copy_cross_system.inl", - "cuda/include/thrust/system/cuda/detail/unique_by_key.h", - "cuda/include/thrust/system/cuda/detail/bulk.h", - "cuda/include/thrust/system/cuda/detail/sort.inl", - "cuda/include/thrust/system/cuda/detail/partition.h", - "cuda/include/thrust/system/cuda/detail/unique.h", - "cuda/include/thrust/system/cuda/detail/execution_policy.h", - "cuda/include/thrust/system/cuda/detail/cuda_launch_config.h", - "cuda/include/thrust/system/cuda/detail/cub.h", - "cuda/include/thrust/system/cuda/detail/adjacent_difference.h", - "cuda/include/thrust/system/cuda/detail/sequence.h", - "cuda/include/thrust/system/cuda/detail/merge.h", - "cuda/include/thrust/system/cuda/detail/set_symmetric_difference.inl", - "cuda/include/thrust/system/cuda/detail/copy_if.inl", - "cuda/include/thrust/system/cuda/detail/transform_reduce.h", - "cuda/include/thrust/system/cuda/detail/error.inl", - "cuda/include/thrust/system/cuda/detail/gather.h", - "cuda/include/thrust/system/cuda/detail/reduce_by_key.inl", - "cuda/include/thrust/system/cuda/detail/sort.h", - "cuda/include/thrust/system/cuda/detail/synchronize.h", - "cuda/include/thrust/system/cuda/detail/scan.h", - "cuda/include/thrust/system/cuda/detail/temporary_indirect_permutation.h", - "cuda/include/thrust/system/cuda/detail/extern_shared_ptr.h", - "cuda/include/thrust/system/cuda/detail/detail/set_operation.inl", - "cuda/include/thrust/system/cuda/detail/detail/balanced_path.h", - "cuda/include/thrust/system/cuda/detail/detail/virtualized_smem_closure.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.h", - "cuda/include/thrust/system/cuda/detail/detail/set_operation.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.inl", - "cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.h", - "cuda/include/thrust/system/cuda/detail/detail/launch_closure.inl", - "cuda/include/thrust/system/cuda/detail/detail/merge.h", - "cuda/include/thrust/system/cuda/detail/detail/alignment.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.inl", - "cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.h", - "cuda/include/thrust/system/cuda/detail/detail/launch_calculator.inl", - "cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.inl", - "cuda/include/thrust/system/cuda/detail/detail/launch_closure.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.h", - "cuda/include/thrust/system/cuda/detail/detail/uninitialized.h", - "cuda/include/thrust/system/cuda/detail/detail/cached_temporary_allocator.h", - "cuda/include/thrust/system/cuda/detail/detail/launch_calculator.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.inl", - "cuda/include/thrust/system/cuda/detail/temporary_buffer.h", - "cuda/include/thrust/system/cuda/detail/default_decomposition.h", - "cuda/include/thrust/system/cuda/detail/reduce.inl", - "cuda/include/thrust/system/cuda/detail/scan_by_key.h", - "cuda/include/thrust/system/cuda/detail/reverse.h", - "cuda/include/thrust/system/cuda/detail/assign_value.h", - "cuda/include/thrust/system/cuda/detail/scatter.h", - "cuda/include/thrust/system/cuda/detail/reduce_intervals.hpp", - "cuda/include/thrust/system/cuda/detail/for_each.inl", - "cuda/include/thrust/system/cuda/detail/default_decomposition.inl", - "cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h", - "cuda/include/thrust/system/cuda/detail/adjacent_difference.inl", - "cuda/include/thrust/system/cuda/detail/vector.inl", - "cuda/include/thrust/system/cuda/detail/throw_on_error.h", - "cuda/include/thrust/system/cuda/detail/find.h", - "cuda/include/thrust/system/cuda/detail/terminate.h", - "cuda/include/thrust/system/cuda/detail/merge.inl", - "cuda/include/thrust/system/cuda/detail/trivial_copy.inl", - "cuda/include/thrust/system/cuda/detail/generate.h", - "cuda/include/thrust/system/cuda/detail/execute_on_stream.h", - "cuda/include/thrust/system/cuda/detail/uninitialized_fill.h", - "cuda/include/thrust/system/cuda/detail/remove.h", - "cuda/include/thrust/system/cuda/detail/tabulate.h", - "cuda/include/thrust/system/cuda/detail/for_each.h", - "cuda/include/thrust/system/cuda/detail/reduce_by_key.h", - "cuda/include/thrust/system/cuda/detail/decomposition.h", - "cuda/include/thrust/system/cuda/detail/reduce.h", - "cuda/include/thrust/system/cuda/detail/equal.h", - "cuda/include/thrust/system/cuda/detail/runtime_introspection.h", - "cuda/include/thrust/system/cuda/detail/copy.inl", - "cuda/include/thrust/system/cuda/detail/copy.h", - "cuda/include/thrust/system/cuda/detail/swap_ranges.h", - "cuda/include/thrust/system/cuda/detail/uninitialized_copy.h", - "cuda/include/thrust/system/cuda/detail/binary_search.h", - "cuda/include/thrust/system/cuda/detail/runtime_introspection.inl", - "cuda/include/thrust/system/cuda/detail/set_operations.h", - "cuda/include/thrust/system/cuda/detail/mismatch.h", - "cuda/include/thrust/system/cuda/detail/scan.inl", - "cuda/include/thrust/system/cuda/detail/synchronize.inl", - "cuda/include/thrust/system/cuda/detail/extrema.h", - "cuda/include/thrust/system/cuda/detail/set_union.inl", - "cuda/include/thrust/system/cuda/detail/set_intersection.inl", - "cuda/include/thrust/system/cuda/detail/count.h", - "cuda/include/thrust/system/cuda/detail/trivial_copy.h", - "cuda/include/thrust/system/cuda/detail/copy_device_to_device.inl", - "cuda/include/thrust/system/cuda/detail/replace.h", - "cuda/include/thrust/system/cuda/detail/bulk/malloc.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/config.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/closure.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/tail_flags.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/terminate.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/alignment.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/guarded_cuda_runtime_api.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/choose_sizes.inl", - "cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_meta_transform.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/head_flags.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/synchronize.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/throw_on_error.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/parameter_ptr.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launcher.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/triple_chevron_launcher.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.inl", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launch_config.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/async.inl", - "cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_transform.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/pointer_traits.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/is_contiguous_iterator.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/iterator.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/choose_sizes.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/copy.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/merge.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/accumulate.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/scan.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/detail/stable_merge_sort.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/gather.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/sort.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/scatter.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/adjacent_difference.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/for_each.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/bulk.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/execution_policy.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/iterator/strided_iterator.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/uninitialized.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/async.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/future.hpp", - "cuda/include/thrust/system/cuda/detail/guarded_driver_types.h", - "cuda/include/thrust/system/cuda/detail/get_value.h", - "cuda/include/thrust/system/cuda/detail/inner_product.h", - "cuda/include/thrust/system/cuda/detail/copy_if.h", - "cuda/include/thrust/system/cuda/detail/logical.h", - "cuda/include/thrust/system/cuda/detail/iter_swap.h", - "cuda/include/thrust/system/cuda/detail/block/merge.h", - "cuda/include/thrust/system/cuda/detail/block/inclusive_scan.h", - "cuda/include/thrust/system/cuda/detail/block/merge.inl", - "cuda/include/thrust/system/cuda/detail/block/merging_sort.h", - "cuda/include/thrust/system/cuda/detail/block/exclusive_scan.h", - "cuda/include/thrust/system/cuda/detail/block/reduce.h", - "cuda/include/thrust/system/cuda/detail/block/copy.h", - "cuda/include/thrust/system/cuda/detail/block/odd_even_sort.h", - "cuda/include/thrust/system/cuda/detail/par.h", - "cuda/include/thrust/system/cuda/detail/copy_cross_system.h", - "cuda/include/thrust/system/cuda/detail/reduce_intervals.h", - "cuda/include/thrust/system/cuda/detail/malloc_and_free.h", - "cuda/include/thrust/system/cuda/detail/fill.h", - "cuda/include/thrust/system/cuda/detail/set_difference.inl", - "cuda/include/thrust/system/cuda/detail/transform.h", - "cuda/include/thrust/system/cuda/experimental/pinned_allocator.h", - "cuda/include/thrust/system/cuda/memory.h", - "cuda/include/thrust/remove.h", + "cuda/include/thrust/system/tbb/detail/transform_reduce.h", + "cuda/include/thrust/system/tbb/detail/transform_scan.h", + "cuda/include/thrust/system/tbb/detail/uninitialized_copy.h", + "cuda/include/thrust/system/tbb/detail/uninitialized_fill.h", + "cuda/include/thrust/system/tbb/detail/unique.h", + "cuda/include/thrust/system/tbb/detail/unique.inl", + "cuda/include/thrust/system/tbb/detail/unique_by_key.h", + "cuda/include/thrust/system/tbb/detail/unique_by_key.inl", + "cuda/include/thrust/system/tbb/detail/vector.inl", + "cuda/include/thrust/system/tbb/execution_policy.h", + "cuda/include/thrust/system/tbb/memory.h", + "cuda/include/thrust/system/tbb/vector.h", + "cuda/include/thrust/system_error.h", "cuda/include/thrust/tabulate.h", - "cuda/include/thrust/for_each.h", - "cuda/include/thrust/distance.h", - "cuda/include/thrust/reduce.h", - "cuda/include/thrust/equal.h", - "cuda/include/thrust/complex.h", - "cuda/include/thrust/device_allocator.h", - "cuda/include/thrust/copy.h", + "cuda/include/thrust/transform.h", + "cuda/include/thrust/transform_reduce.h", + "cuda/include/thrust/transform_scan.h", + "cuda/include/thrust/tuple.h", "cuda/include/thrust/uninitialized_copy.h", - "cuda/include/thrust/device_reference.h", - "cuda/include/thrust/binary_search.h", - "cuda/include/thrust/set_operations.h", - "cuda/include/thrust/swap.h", - "cuda/include/thrust/mismatch.h", - "cuda/include/thrust/extrema.h", - "cuda/include/thrust/count.h", - "cuda/include/thrust/device_free.h", - "cuda/include/thrust/random/discard_block_engine.h", - "cuda/include/thrust/random/normal_distribution.h", - "cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h", - "cuda/include/thrust/random/detail/subtract_with_carry_engine.inl", - "cuda/include/thrust/random/detail/xor_combine_engine_max.h", - "cuda/include/thrust/random/detail/linear_congruential_engine_discard.h", - "cuda/include/thrust/random/detail/uniform_int_distribution.inl", - "cuda/include/thrust/random/detail/discard_block_engine.inl", - "cuda/include/thrust/random/detail/uniform_real_distribution.inl", - "cuda/include/thrust/random/detail/random_core_access.h", - "cuda/include/thrust/random/detail/mod.h", - "cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl", - "cuda/include/thrust/random/detail/linear_congruential_engine.inl", - "cuda/include/thrust/random/detail/xor_combine_engine.inl", - "cuda/include/thrust/random/detail/normal_distribution.inl", - "cuda/include/thrust/random/detail/normal_distribution_base.h", - "cuda/include/thrust/random/uniform_int_distribution.h", - "cuda/include/thrust/random/linear_feedback_shift_engine.h", - "cuda/include/thrust/random/xor_combine_engine.h", - "cuda/include/thrust/random/subtract_with_carry_engine.h", - "cuda/include/thrust/random/linear_congruential_engine.h", - "cuda/include/thrust/random/uniform_real_distribution.h", - "cuda/include/thrust/functional.h", - "cuda/include/thrust/replace.h", - "cuda/include/thrust/device_new_allocator.h", - "cuda/include/thrust/host_vector.h", + "cuda/include/thrust/uninitialized_fill.h", + "cuda/include/thrust/unique.h", "cuda/include/thrust/version.h", - "cuda/include/thrust/inner_product.h", - "cuda/include/thrust/iterator/iterator_traits.h", - "cuda/include/thrust/iterator/discard_iterator.h", - "cuda/include/thrust/iterator/retag.h", - "cuda/include/thrust/iterator/permutation_iterator.h", - "cuda/include/thrust/iterator/transform_iterator.h", - "cuda/include/thrust/iterator/detail/reverse_iterator.inl", - "cuda/include/thrust/iterator/detail/zip_iterator.inl", - "cuda/include/thrust/iterator/detail/counting_iterator.inl", - "cuda/include/thrust/iterator/detail/distance_from_result.h", - "cuda/include/thrust/iterator/detail/host_system_tag.h", - "cuda/include/thrust/iterator/detail/iterator_traversal_tags.h", - "cuda/include/thrust/iterator/detail/retag.h", - "cuda/include/thrust/iterator/detail/tagged_iterator.h", - "cuda/include/thrust/iterator/detail/iterator_traits.inl", - "cuda/include/thrust/iterator/detail/minimum_category.h", - "cuda/include/thrust/iterator/detail/discard_iterator_base.h", - "cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h", - "cuda/include/thrust/iterator/detail/zip_iterator_base.h", - "cuda/include/thrust/iterator/detail/normal_iterator.h", - "cuda/include/thrust/iterator/detail/join_iterator.h", - "cuda/include/thrust/iterator/detail/device_system_tag.h", - "cuda/include/thrust/iterator/detail/universal_categories.h", - "cuda/include/thrust/iterator/detail/reverse_iterator_base.h", - "cuda/include/thrust/iterator/detail/minimum_system.h", - "cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h", - "cuda/include/thrust/iterator/detail/is_iterator_category.h", - "cuda/include/thrust/iterator/detail/permutation_iterator_base.h", - "cuda/include/thrust/iterator/detail/any_assign.h", - "cuda/include/thrust/iterator/detail/any_system_tag.h", - "cuda/include/thrust/iterator/detail/is_trivial_iterator.h", - "cuda/include/thrust/iterator/detail/iterator_category_to_system.h", - "cuda/include/thrust/iterator/detail/iterator_adaptor_base.h", - "cuda/include/thrust/iterator/detail/constant_iterator_base.h", - "cuda/include/thrust/iterator/detail/transform_iterator.inl", - "cuda/include/thrust/iterator/detail/iterator_facade_category.h", - "cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h", - "cuda/include/thrust/iterator/constant_iterator.h", - "cuda/include/thrust/iterator/counting_iterator.h", - "cuda/include/thrust/iterator/iterator_adaptor.h", - "cuda/include/thrust/iterator/iterator_facade.h", - "cuda/include/thrust/iterator/iterator_categories.h", - "cuda/include/thrust/iterator/reverse_iterator.h", - "cuda/include/thrust/iterator/zip_iterator.h", - "cuda/include/thrust/logical.h", - "cuda/include/thrust/tuple.h", - "cuda/include/thrust/memory.h", - "cuda/include/thrust/random.h", - "cuda/include/thrust/fill.h", - "cuda/include/thrust/transform.h", - "cuda/include/texture_types.h", - "cuda/include/nppversion.h", - "cuda/include/cuda_texture_types.h", - "cuda/include/fatbinary.h", - "cuda/include/cublasXt.h", - "cuda/include/cuda_fp16.h", "cuda/include/vector_functions.h", - "cuda/include/cusparse.h", - "cuda/include/nppi_filtering_functions.h", - "cuda/include/nppi_morphological_operations.h", - "cuda/include/sobol_direction_vectors.h", - "cuda/include/nvblas.h", - "cuda/include/curand_mtgp32dc_p_11213.h", - "cuda/include/nvcuvid.h", - "cuda/include/cuda_runtime_api.h", - "cuda/include/curand_mtgp32_kernel.h", - "cuda/include/cublas_v2.h", - "cuda/include/builtin_types.h", - "cuda/include/nppi_geometry_transforms.h", - "cuda/include/npps_support_functions.h", - "cuda/include/cufftw.h", - "cuda/include/cuda_device_runtime_api.h", - "cuda/include/sm_30_intrinsics.hpp", + "cuda/include/vector_functions.hpp", "cuda/include/vector_types.h", - "cuda/include/sm_35_atomic_functions.h", - "cuda/include/sm_20_intrinsics.h", - "cuda/include/driver_types.h", - "cuda/include/nvToolsExtCudaRt.h", - "cuda/include/curand_globals.h", - "cuda/include/device_atomic_functions.h", - "cuda/include/surface_types.h", - "cuda/include/nvrtc.h", - "cuda/include/nppdefs.h", - "cuda/include/sm_60_atomic_functions.h", - "cuda/include/driver_functions.h", - "cuda/include/cusolver_common.h", - "cuda/include/cublas.h", - "cuda/include/curand_lognormal.h", - "cuda/include/device_atomic_functions.hpp", - "cuda/include/crt/device_runtime.h", - "cuda/include/crt/storage_class.h", - "cuda/include/crt/func_macro.h", - "cuda/include/crt/host_runtime.h", - "cuda/include/nppi_arithmetic_and_logical_operations.h", - "cuda/include/npps_arithmetic_and_logical_operations.h", - "cuda/include/nppi_computer_vision.h", - "cuda/include/surface_functions.hpp", - "cuda/include/surface_functions.h", - "cuda/include/curand_normal_static.h", - "cuda/include/curand.h", - "cuda/include/math_functions_dbl_ptx3.h", - "cuda/include/curand_philox4x32_x.h", - "cuda/include/nppi_threshold_and_compare_operations.h", - "cuda/include/nvml.h", - "cuda/include/npps.h", - "cuda/include/cuda_vdpau_interop.h", - "cuda/include/sm_61_intrinsics.hpp", - "cuda/include/cublas_api.h", - "cuda/include/nppi_color_conversion.h", - "cuda/include/math_functions_dbl_ptx3.hpp", - "cuda/include/nppcore.h", - "cuda/include/cudaGL.h", - "cuda/include/fatBinaryCtl.h", - "cuda/include/npps_statistics_functions.h", - "cuda/include/cudaVDPAU.h", - "cuda/include/curand_poisson.h", - "cuda/include/cusolverDn.h", - "cuda/include/cuda_profiler_api.h", - "cuda/include/sm_20_atomic_functions.h", - "cuda/include/nvfunctional", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/include/math_functions.hpp" "$(@D)/cuda/include/math_functions.hpp" && cp "/usr/local/cuda-8.0/include/cufft.h" "$(@D)/cuda/include/cufft.h" && cp "/usr/local/cuda-8.0/include/nvgraph.h" "$(@D)/cuda/include/nvgraph.h" && cp "/usr/local/cuda-8.0/include/curand_normal.h" "$(@D)/cuda/include/curand_normal.h" && cp "/usr/local/cuda-8.0/include/curand_uniform.h" "$(@D)/cuda/include/curand_uniform.h" && cp "/usr/local/cuda-8.0/include/nppi_data_exchange_and_initialization.h" "$(@D)/cuda/include/nppi_data_exchange_and_initialization.h" && cp "/usr/local/cuda-8.0/include/cuda_gl_interop.h" "$(@D)/cuda/include/cuda_gl_interop.h" && cp "/usr/local/cuda-8.0/include/nppi_compression_functions.h" "$(@D)/cuda/include/nppi_compression_functions.h" && cp "/usr/local/cuda-8.0/include/npp.h" "$(@D)/cuda/include/npp.h" && cp "/usr/local/cuda-8.0/include/cuda.h" "$(@D)/cuda/include/cuda.h" && cp "/usr/local/cuda-8.0/include/nppi_statistics_functions.h" "$(@D)/cuda/include/nppi_statistics_functions.h" && cp "/usr/local/cuda-8.0/include/vector_functions.hpp" "$(@D)/cuda/include/vector_functions.hpp" && cp "/usr/local/cuda-8.0/include/sm_32_intrinsics.hpp" "$(@D)/cuda/include/sm_32_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/sm_32_intrinsics.h" "$(@D)/cuda/include/sm_32_intrinsics.h" && cp "/usr/local/cuda-8.0/include/curand_discrete.h" "$(@D)/cuda/include/curand_discrete.h" && cp "/usr/local/cuda-8.0/include/cuda_runtime.h" "$(@D)/cuda/include/cuda_runtime.h" && cp "/usr/local/cuda-8.0/include/cufftXt.h" "$(@D)/cuda/include/cufftXt.h" && cp "/usr/local/cuda-8.0/include/sm_61_intrinsics.h" "$(@D)/cuda/include/sm_61_intrinsics.h" && cp "/usr/local/cuda-8.0/include/texture_fetch_functions.h" "$(@D)/cuda/include/texture_fetch_functions.h" && cp "/usr/local/cuda-8.0/include/curand_mrg32k3a.h" "$(@D)/cuda/include/curand_mrg32k3a.h" && cp "/usr/local/cuda-8.0/include/host_defines.h" "$(@D)/cuda/include/host_defines.h" && cp "/usr/local/cuda-8.0/include/common_functions.h" "$(@D)/cuda/include/common_functions.h" && cp "/usr/local/cuda-8.0/include/nppi_support_functions.h" "$(@D)/cuda/include/nppi_support_functions.h" && cp "/usr/local/cuda-8.0/include/nppi_linear_transforms.h" "$(@D)/cuda/include/nppi_linear_transforms.h" && cp "/usr/local/cuda-8.0/include/device_double_functions.hpp" "$(@D)/cuda/include/device_double_functions.hpp" && cp "/usr/local/cuda-8.0/include/math_constants.h" "$(@D)/cuda/include/math_constants.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtSync.h" "$(@D)/cuda/include/nvToolsExtSync.h" && cp "/usr/local/cuda-8.0/include/npps_initialization.h" "$(@D)/cuda/include/npps_initialization.h" && cp "/usr/local/cuda-8.0/include/cusolverSp_LOWLEVEL_PREVIEW.h" "$(@D)/cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h" && cp "/usr/local/cuda-8.0/include/texture_indirect_functions.hpp" "$(@D)/cuda/include/texture_indirect_functions.hpp" && cp "/usr/local/cuda-8.0/include/cudaProfiler.h" "$(@D)/cuda/include/cudaProfiler.h" && cp "/usr/local/cuda-8.0/include/npps_filtering_functions.h" "$(@D)/cuda/include/npps_filtering_functions.h" && cp "/usr/local/cuda-8.0/include/cusparse_v2.h" "$(@D)/cuda/include/cusparse_v2.h" && cp "/usr/local/cuda-8.0/include/nppi.h" "$(@D)/cuda/include/nppi.h" && cp "/usr/local/cuda-8.0/include/surface_indirect_functions.h" "$(@D)/cuda/include/surface_indirect_functions.h" && cp "/usr/local/cuda-8.0/include/sm_30_intrinsics.h" "$(@D)/cuda/include/sm_30_intrinsics.h" && cp "/usr/local/cuda-8.0/include/device_double_functions.h" "$(@D)/cuda/include/device_double_functions.h" && cp "/usr/local/cuda-8.0/include/sm_35_intrinsics.h" "$(@D)/cuda/include/sm_35_intrinsics.h" && cp "/usr/local/cuda-8.0/include/cusolverSp.h" "$(@D)/cuda/include/cusolverSp.h" && cp "/usr/local/cuda-8.0/include/library_types.h" "$(@D)/cuda/include/library_types.h" && cp "/usr/local/cuda-8.0/include/surface_indirect_functions.hpp" "$(@D)/cuda/include/surface_indirect_functions.hpp" && cp "/usr/local/cuda-8.0/include/cudalibxt.h" "$(@D)/cuda/include/cudalibxt.h" && cp "/usr/local/cuda-8.0/include/channel_descriptor.h" "$(@D)/cuda/include/channel_descriptor.h" && cp "/usr/local/cuda-8.0/include/device_functions_decls.h" "$(@D)/cuda/include/device_functions_decls.h" && cp "/usr/local/cuda-8.0/include/curand_kernel.h" "$(@D)/cuda/include/curand_kernel.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32_host.h" "$(@D)/cuda/include/curand_mtgp32_host.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtCuda.h" "$(@D)/cuda/include/nvToolsExtCuda.h" && cp "/usr/local/cuda-8.0/include/nvToolsExt.h" "$(@D)/cuda/include/nvToolsExt.h" && cp "/usr/local/cuda-8.0/include/cuComplex.h" "$(@D)/cuda/include/cuComplex.h" && cp "/usr/local/cuda-8.0/include/sm_32_atomic_functions.h" "$(@D)/cuda/include/sm_32_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/texture_indirect_functions.h" "$(@D)/cuda/include/texture_indirect_functions.h" && cp "/usr/local/cuda-8.0/include/sm_32_atomic_functions.hpp" "$(@D)/cuda/include/sm_32_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/sm_20_intrinsics.hpp" "$(@D)/cuda/include/sm_20_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/device_launch_parameters.h" "$(@D)/cuda/include/device_launch_parameters.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32.h" "$(@D)/cuda/include/curand_mtgp32.h" && cp "/usr/local/cuda-8.0/include/texture_fetch_functions.hpp" "$(@D)/cuda/include/texture_fetch_functions.hpp" && cp "/usr/local/cuda-8.0/include/cuda_occupancy.h" "$(@D)/cuda/include/cuda_occupancy.h" && cp "/usr/local/cuda-8.0/include/CL/opencl.h" "$(@D)/cuda/include/CL/opencl.h" && cp "/usr/local/cuda-8.0/include/CL/cl_platform.h" "$(@D)/cuda/include/CL/cl_platform.h" && cp "/usr/local/cuda-8.0/include/CL/cl_egl.h" "$(@D)/cuda/include/CL/cl_egl.h" && cp "/usr/local/cuda-8.0/include/CL/cl_gl.h" "$(@D)/cuda/include/CL/cl_gl.h" && cp "/usr/local/cuda-8.0/include/CL/cl.h" "$(@D)/cuda/include/CL/cl.h" && cp "/usr/local/cuda-8.0/include/CL/cl_gl_ext.h" "$(@D)/cuda/include/CL/cl_gl_ext.h" && cp "/usr/local/cuda-8.0/include/CL/cl_ext.h" "$(@D)/cuda/include/CL/cl_ext.h" && cp "/usr/local/cuda-8.0/include/CL/cl.hpp" "$(@D)/cuda/include/CL/cl.hpp" && cp "/usr/local/cuda-8.0/include/host_config.h" "$(@D)/cuda/include/host_config.h" && cp "/usr/local/cuda-8.0/include/cuda_surface_types.h" "$(@D)/cuda/include/cuda_surface_types.h" && cp "/usr/local/cuda-8.0/include/math_functions.h" "$(@D)/cuda/include/math_functions.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtMeta.h" "$(@D)/cuda/include/nvToolsExtMeta.h" && cp "/usr/local/cuda-8.0/include/sm_20_atomic_functions.hpp" "$(@D)/cuda/include/sm_20_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/device_functions.h" "$(@D)/cuda/include/device_functions.h" && cp "/usr/local/cuda-8.0/include/device_types.h" "$(@D)/cuda/include/device_types.h" && cp "/usr/local/cuda-8.0/include/npps_conversion_functions.h" "$(@D)/cuda/include/npps_conversion_functions.h" && cp "/usr/local/cuda-8.0/include/curand_precalc.h" "$(@D)/cuda/include/curand_precalc.h" && cp "/usr/local/cuda-8.0/include/cusolverRf.h" "$(@D)/cuda/include/cusolverRf.h" && cp "/usr/local/cuda-8.0/include/sm_60_atomic_functions.hpp" "$(@D)/cuda/include/sm_60_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/cuviddec.h" "$(@D)/cuda/include/cuviddec.h" && cp "/usr/local/cuda-8.0/include/curand_discrete2.h" "$(@D)/cuda/include/curand_discrete2.h" && cp "/usr/local/cuda-8.0/include/device_functions.hpp" "$(@D)/cuda/include/device_functions.hpp" && cp "/usr/local/cuda-8.0/include/thrust/transform_scan.h" "$(@D)/cuda/include/thrust/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system_error.h" "$(@D)/cuda/include/thrust/system_error.h" && cp "/usr/local/cuda-8.0/include/thrust/device_malloc.h" "$(@D)/cuda/include/thrust/device_malloc.h" && cp "/usr/local/cuda-8.0/include/thrust/partition.h" "$(@D)/cuda/include/thrust/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/unique.h" "$(@D)/cuda/include/thrust/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/device_delete.h" "$(@D)/cuda/include/thrust/device_delete.h" && cp "/usr/local/cuda-8.0/include/thrust/execution_policy.h" "$(@D)/cuda/include/thrust/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/adjacent_difference.h" "$(@D)/cuda/include/thrust/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/sequence.h" "$(@D)/cuda/include/thrust/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/merge.h" "$(@D)/cuda/include/thrust/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/device_new.h" "$(@D)/cuda/include/thrust/device_new.h" && cp "/usr/local/cuda-8.0/include/thrust/transform_reduce.h" "$(@D)/cuda/include/thrust/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/device_vector.h" "$(@D)/cuda/include/thrust/device_vector.h" && cp "/usr/local/cuda-8.0/include/thrust/gather.h" "$(@D)/cuda/include/thrust/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/sort.h" "$(@D)/cuda/include/thrust/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/scan.h" "$(@D)/cuda/include/thrust/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/temporary_array.h" "$(@D)/cuda/include/thrust/detail/temporary_array.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/util/align.h" "$(@D)/cuda/include/thrust/detail/util/align.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/util/blocking.h" "$(@D)/cuda/include/thrust/detail/util/blocking.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/transform.inl" "$(@D)/cuda/include/thrust/detail/transform.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_vector.inl" "$(@D)/cuda/include/thrust/detail/device_vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/binary_search.inl" "$(@D)/cuda/include/thrust/detail/binary_search.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/overlapped_copy.h" "$(@D)/cuda/include/thrust/detail/overlapped_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/vector_base.inl" "$(@D)/cuda/include/thrust/detail/vector_base.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_reference.inl" "$(@D)/cuda/include/thrust/detail/device_reference.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/actor.h" "$(@D)/cuda/include/thrust/detail/functional/actor.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/value.h" "$(@D)/cuda/include/thrust/detail/functional/value.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/logical_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/logical_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/relational_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/relational_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/assignment_operator.h" "$(@D)/cuda/include/thrust/detail/functional/operators/assignment_operator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/bitwise_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/bitwise_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/operator_adaptors.h" "$(@D)/cuda/include/thrust/detail/functional/operators/operator_adaptors.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/arithmetic_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/arithmetic_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/compound_assignment_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/argument.h" "$(@D)/cuda/include/thrust/detail/functional/argument.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/placeholder.h" "$(@D)/cuda/include/thrust/detail/functional/placeholder.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/actor.inl" "$(@D)/cuda/include/thrust/detail/functional/actor.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/composite.h" "$(@D)/cuda/include/thrust/detail/functional/composite.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/static_map.h" "$(@D)/cuda/include/thrust/detail/static_map.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/has_nested_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_nested_type.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/is_call_possible.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_call_possible.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/function_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/function_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/pointer_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/pointer_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/has_member_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_member_function.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" "$(@D)/cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/minimum_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/minimum_type.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/has_trivial_assign.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_trivial_assign.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/is_metafunction_defined.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_metafunction_defined.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/iterator/is_output_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/result_of_adaptable_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reference.h" "$(@D)/cuda/include/thrust/detail/reference.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/inner_product.inl" "$(@D)/cuda/include/thrust/detail/inner_product.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/use_default.h" "$(@D)/cuda/include/thrust/detail/use_default.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/sequence.inl" "$(@D)/cuda/include/thrust/detail/sequence.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/sort.inl" "$(@D)/cuda/include/thrust/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/equal.inl" "$(@D)/cuda/include/thrust/detail/equal.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/execution_policy.h" "$(@D)/cuda/include/thrust/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/integer_traits.h" "$(@D)/cuda/include/thrust/detail/integer_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reverse.inl" "$(@D)/cuda/include/thrust/detail/reverse.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tabulate.inl" "$(@D)/cuda/include/thrust/detail/tabulate.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/unique.inl" "$(@D)/cuda/include/thrust/detail/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/scatter.inl" "$(@D)/cuda/include/thrust/detail/scatter.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/set_operations.inl" "$(@D)/cuda/include/thrust/detail/set_operations.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_malloc.inl" "$(@D)/cuda/include/thrust/detail/device_malloc.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy_if.inl" "$(@D)/cuda/include/thrust/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/fill.inl" "$(@D)/cuda/include/thrust/detail/fill.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/temporary_array.inl" "$(@D)/cuda/include/thrust/detail/temporary_array.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/transform_scan.inl" "$(@D)/cuda/include/thrust/detail/transform_scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/minmax.h" "$(@D)/cuda/include/thrust/detail/minmax.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/swap.inl" "$(@D)/cuda/include/thrust/detail/swap.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/pointer.inl" "$(@D)/cuda/include/thrust/detail/pointer.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/transform_reduce.inl" "$(@D)/cuda/include/thrust/detail/transform_reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/config.h" "$(@D)/cuda/include/thrust/detail/config.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/distance.inl" "$(@D)/cuda/include/thrust/detail/distance.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/pair.inl" "$(@D)/cuda/include/thrust/detail/pair.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/temporary_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/tagged_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/destroy_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/destroy_range.h" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/no_throw_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/no_throw_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/default_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/fill_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/tagged_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/malloc_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/allocator_traits.h" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/copy_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/allocator_traits.inl" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/default_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/copy_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/malloc_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/temporary_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/fill_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reduce.inl" "$(@D)/cuda/include/thrust/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_new.inl" "$(@D)/cuda/include/thrust/detail/device_new.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/pointer.h" "$(@D)/cuda/include/thrust/detail/pointer.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/for_each.inl" "$(@D)/cuda/include/thrust/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/generate.inl" "$(@D)/cuda/include/thrust/detail/generate.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/dispatch/is_trivial_copy.h" "$(@D)/cuda/include/thrust/detail/dispatch/is_trivial_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/detail/adjacent_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tuple_meta_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_meta_transform.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional.inl" "$(@D)/cuda/include/thrust/detail/functional.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/remove.inl" "$(@D)/cuda/include/thrust/detail/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tuple_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_transform.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/merge.inl" "$(@D)/cuda/include/thrust/detail/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/extrema.inl" "$(@D)/cuda/include/thrust/detail/extrema.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/trivial_sequence.h" "$(@D)/cuda/include/thrust/detail/trivial_sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/vector_base.h" "$(@D)/cuda/include/thrust/detail/vector_base.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/count.inl" "$(@D)/cuda/include/thrust/detail/count.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/function.h" "$(@D)/cuda/include/thrust/detail/function.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/swap_ranges.inl" "$(@D)/cuda/include/thrust/detail/swap_ranges.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_delete.inl" "$(@D)/cuda/include/thrust/detail/device_delete.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/static_assert.h" "$(@D)/cuda/include/thrust/detail/static_assert.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/logical.inl" "$(@D)/cuda/include/thrust/detail/logical.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/seq.h" "$(@D)/cuda/include/thrust/detail/seq.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/mpl/math.h" "$(@D)/cuda/include/thrust/detail/mpl/math.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/mismatch.inl" "$(@D)/cuda/include/thrust/detail/mismatch.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/internal_functional.h" "$(@D)/cuda/include/thrust/detail/internal_functional.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/get_iterator_value.h" "$(@D)/cuda/include/thrust/detail/get_iterator_value.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy.inl" "$(@D)/cuda/include/thrust/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy.h" "$(@D)/cuda/include/thrust/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/catrigf.h" "$(@D)/cuda/include/thrust/detail/complex/catrigf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cpowf.h" "$(@D)/cuda/include/thrust/detail/complex/cpowf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csqrtf.h" "$(@D)/cuda/include/thrust/detail/complex/csqrtf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ccoshf.h" "$(@D)/cuda/include/thrust/detail/complex/ccoshf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csinhf.h" "$(@D)/cuda/include/thrust/detail/complex/csinhf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/clogf.h" "$(@D)/cuda/include/thrust/detail/complex/clogf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ccosh.h" "$(@D)/cuda/include/thrust/detail/complex/ccosh.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/arithmetic.h" "$(@D)/cuda/include/thrust/detail/complex/arithmetic.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csqrt.h" "$(@D)/cuda/include/thrust/detail/complex/csqrt.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cpow.h" "$(@D)/cuda/include/thrust/detail/complex/cpow.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/complex.inl" "$(@D)/cuda/include/thrust/detail/complex/complex.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/math_private.h" "$(@D)/cuda/include/thrust/detail/complex/math_private.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/c99math.h" "$(@D)/cuda/include/thrust/detail/complex/c99math.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cproj.h" "$(@D)/cuda/include/thrust/detail/complex/cproj.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/catrig.h" "$(@D)/cuda/include/thrust/detail/complex/catrig.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ctanhf.h" "$(@D)/cuda/include/thrust/detail/complex/ctanhf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cexpf.h" "$(@D)/cuda/include/thrust/detail/complex/cexpf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csinh.h" "$(@D)/cuda/include/thrust/detail/complex/csinh.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/stream.h" "$(@D)/cuda/include/thrust/detail/complex/stream.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ctanh.h" "$(@D)/cuda/include/thrust/detail/complex/ctanh.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cexp.h" "$(@D)/cuda/include/thrust/detail/complex/cexp.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/clog.h" "$(@D)/cuda/include/thrust/detail/complex/clog.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/range/head_flags.h" "$(@D)/cuda/include/thrust/detail/range/head_flags.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/range/tail_flags.h" "$(@D)/cuda/include/thrust/detail/range/tail_flags.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/execute_with_allocator.h" "$(@D)/cuda/include/thrust/detail/execute_with_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/integer_math.h" "$(@D)/cuda/include/thrust/detail/integer_math.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/swap.h" "$(@D)/cuda/include/thrust/detail/swap.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_fill.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/scan.inl" "$(@D)/cuda/include/thrust/detail/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/gather.inl" "$(@D)/cuda/include/thrust/detail/gather.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/reference_forward_declaration.h" "$(@D)/cuda/include/thrust/detail/reference_forward_declaration.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/numeric_traits.h" "$(@D)/cuda/include/thrust/detail/numeric_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reference.inl" "$(@D)/cuda/include/thrust/detail/reference.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/cstdint.h" "$(@D)/cuda/include/thrust/detail/cstdint.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_free.inl" "$(@D)/cuda/include/thrust/detail/device_free.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy_if.h" "$(@D)/cuda/include/thrust/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/partition.inl" "$(@D)/cuda/include/thrust/detail/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/find.inl" "$(@D)/cuda/include/thrust/detail/find.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/forceinline.h" "$(@D)/cuda/include/thrust/detail/config/forceinline.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/debug.h" "$(@D)/cuda/include/thrust/detail/config/debug.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/config.h" "$(@D)/cuda/include/thrust/detail/config/config.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/host_device.h" "$(@D)/cuda/include/thrust/detail/config/host_device.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/host_system.h" "$(@D)/cuda/include/thrust/detail/config/host_system.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/compiler.h" "$(@D)/cuda/include/thrust/detail/config/compiler.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/device_system.h" "$(@D)/cuda/include/thrust/detail/config/device_system.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/compiler_fence.h" "$(@D)/cuda/include/thrust/detail/config/compiler_fence.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/exec_check_disable.h" "$(@D)/cuda/include/thrust/detail/config/exec_check_disable.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/simple_defines.h" "$(@D)/cuda/include/thrust/detail/config/simple_defines.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/global_workarounds.h" "$(@D)/cuda/include/thrust/detail/config/global_workarounds.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/replace.inl" "$(@D)/cuda/include/thrust/detail/replace.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_ptr.inl" "$(@D)/cuda/include/thrust/detail/device_ptr.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tuple.inl" "$(@D)/cuda/include/thrust/detail/tuple.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/host_vector.inl" "$(@D)/cuda/include/thrust/detail/host_vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/raw_pointer_cast.h" "$(@D)/cuda/include/thrust/detail/raw_pointer_cast.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/advance.inl" "$(@D)/cuda/include/thrust/detail/advance.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/contiguous_storage.h" "$(@D)/cuda/include/thrust/detail/contiguous_storage.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/raw_reference_cast.h" "$(@D)/cuda/include/thrust/detail/raw_reference_cast.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/contiguous_storage.inl" "$(@D)/cuda/include/thrust/detail/contiguous_storage.inl" && cp "/usr/local/cuda-8.0/include/thrust/reverse.h" "$(@D)/cuda/include/thrust/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/device_malloc_allocator.h" "$(@D)/cuda/include/thrust/device_malloc_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/scatter.h" "$(@D)/cuda/include/thrust/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/pair.h" "$(@D)/cuda/include/thrust/pair.h" && cp "/usr/local/cuda-8.0/include/thrust/advance.h" "$(@D)/cuda/include/thrust/advance.h" && cp "/usr/local/cuda-8.0/include/thrust/find.h" "$(@D)/cuda/include/thrust/find.h" && cp "/usr/local/cuda-8.0/include/thrust/device_ptr.h" "$(@D)/cuda/include/thrust/device_ptr.h" && cp "/usr/local/cuda-8.0/include/thrust/generate.h" "$(@D)/cuda/include/thrust/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/uninitialized_fill.h" "$(@D)/cuda/include/thrust/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/system_error.h" "$(@D)/cuda/include/thrust/system/system_error.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/bad_alloc.h" "$(@D)/cuda/include/thrust/system/detail/bad_alloc.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/partition.h" "$(@D)/cuda/include/thrust/system/detail/adl/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/unique.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/adl/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/sequence.h" "$(@D)/cuda/include/thrust/system/detail/adl/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/merge.h" "$(@D)/cuda/include/thrust/system/detail/adl/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/gather.h" "$(@D)/cuda/include/thrust/system/detail/adl/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/sort.h" "$(@D)/cuda/include/thrust/system/detail/adl/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/adl/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/reverse.h" "$(@D)/cuda/include/thrust/system/detail/adl/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/scatter.h" "$(@D)/cuda/include/thrust/system/detail/adl/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/find.h" "$(@D)/cuda/include/thrust/system/detail/adl/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/generate.h" "$(@D)/cuda/include/thrust/system/detail/adl/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/remove.h" "$(@D)/cuda/include/thrust/system/detail/adl/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/adl/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/for_each.h" "$(@D)/cuda/include/thrust/system/detail/adl/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/equal.h" "$(@D)/cuda/include/thrust/system/detail/adl/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/adl/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/adl/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/adl/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/adl/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/extrema.h" "$(@D)/cuda/include/thrust/system/detail/adl/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/count.h" "$(@D)/cuda/include/thrust/system/detail/adl/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/replace.h" "$(@D)/cuda/include/thrust/system/detail/adl/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/get_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/adl/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/logical.h" "$(@D)/cuda/include/thrust/system/detail/adl/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/adl/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/adl/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/transform.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/errno.h" "$(@D)/cuda/include/thrust/system/detail/errno.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/error_category.inl" "$(@D)/cuda/include/thrust/system/detail/error_category.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/partition.h" "$(@D)/cuda/include/thrust/system/detail/sequential/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/unique.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/execution_policy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/sequential/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/sequence.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/merge.h" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/gather.h" "$(@D)/cuda/include/thrust/system/detail/sequential/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy_backward.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_backward.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/sequential/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/reverse.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/scatter.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/find.h" "$(@D)/cuda/include/thrust/system/detail/sequential/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/merge.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/generate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/general_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/general_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/insertion_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/insertion_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/remove.h" "$(@D)/cuda/include/thrust/system/detail/sequential/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/for_each.h" "$(@D)/cuda/include/thrust/system/detail/sequential/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/equal.h" "$(@D)/cuda/include/thrust/system/detail/sequential/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/sequential/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/sequential/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/sequential/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/sequential/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/extrema.h" "$(@D)/cuda/include/thrust/system/detail/sequential/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/count.h" "$(@D)/cuda/include/thrust/system/detail/sequential/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/trivial_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/trivial_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/replace.h" "$(@D)/cuda/include/thrust/system/detail/sequential/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/get_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/sequential/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/logical.h" "$(@D)/cuda/include/thrust/system/detail/sequential/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/sequential/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/sequential/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/transform.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/error_condition.inl" "$(@D)/cuda/include/thrust/system/detail/error_condition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/internal/decompose.h" "$(@D)/cuda/include/thrust/system/detail/internal/decompose.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/error_code.inl" "$(@D)/cuda/include/thrust/system/detail/error_code.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/memory.inl" "$(@D)/cuda/include/thrust/system/detail/generic/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/inner_product.inl" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/select_system.h" "$(@D)/cuda/include/thrust/system/detail/generic/select_system.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sequence.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sort.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/equal.inl" "$(@D)/cuda/include/thrust/system/detail/generic/equal.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/partition.h" "$(@D)/cuda/include/thrust/system/detail/generic/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/tag.h" "$(@D)/cuda/include/thrust/system/detail/generic/tag.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sequence.h" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/type_traits.h" "$(@D)/cuda/include/thrust/system/detail/generic/type_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/merge.h" "$(@D)/cuda/include/thrust/system/detail/generic/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reverse.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/tabulate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scatter.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/set_operations.inl" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy_if.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/gather.h" "$(@D)/cuda/include/thrust/system/detail/generic/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sort.h" "$(@D)/cuda/include/thrust/system/detail/generic/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/distance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/distance.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reverse.h" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/temporary_buffer.inl" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scatter.h" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/generate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/generate.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/remove.inl" "$(@D)/cuda/include/thrust/system/detail/generic/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/advance.h" "$(@D)/cuda/include/thrust/system/detail/generic/advance.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/find.h" "$(@D)/cuda/include/thrust/system/detail/generic/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/merge.inl" "$(@D)/cuda/include/thrust/system/detail/generic/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scalar/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scalar/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/extrema.inl" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/generate.h" "$(@D)/cuda/include/thrust/system/detail/generic/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/count.inl" "$(@D)/cuda/include/thrust/system/detail/generic/count.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/remove.h" "$(@D)/cuda/include/thrust/system/detail/generic/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/for_each.h" "$(@D)/cuda/include/thrust/system/detail/generic/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/distance.h" "$(@D)/cuda/include/thrust/system/detail/generic/distance.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/swap_ranges.inl" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/equal.h" "$(@D)/cuda/include/thrust/system/detail/generic/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/mismatch.inl" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/gather.inl" "$(@D)/cuda/include/thrust/system/detail/generic/gather.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/extrema.h" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/count.h" "$(@D)/cuda/include/thrust/system/detail/generic/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/replace.h" "$(@D)/cuda/include/thrust/system/detail/generic/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/logical.h" "$(@D)/cuda/include/thrust/system/detail/generic/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/partition.inl" "$(@D)/cuda/include/thrust/system/detail/generic/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/memory.h" "$(@D)/cuda/include/thrust/system/detail/generic/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/find.inl" "$(@D)/cuda/include/thrust/system/detail/generic/find.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/replace.inl" "$(@D)/cuda/include/thrust/system/detail/generic/replace.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/advance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/advance.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/system_error.inl" "$(@D)/cuda/include/thrust/system/detail/system_error.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/vector.h" "$(@D)/cuda/include/thrust/system/omp/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/omp/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/sort.inl" "$(@D)/cuda/include/thrust/system/omp/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/partition.h" "$(@D)/cuda/include/thrust/system/omp/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/omp/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/omp/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/merge.h" "$(@D)/cuda/include/thrust/system/omp/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/gather.h" "$(@D)/cuda/include/thrust/system/omp/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/sort.h" "$(@D)/cuda/include/thrust/system/omp/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/omp/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/omp/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/omp/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/remove.inl" "$(@D)/cuda/include/thrust/system/omp/detail/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/omp/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/find.h" "$(@D)/cuda/include/thrust/system/omp/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/generate.h" "$(@D)/cuda/include/thrust/system/omp/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/remove.h" "$(@D)/cuda/include/thrust/system/omp/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/omp/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/equal.h" "$(@D)/cuda/include/thrust/system/omp/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/omp/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/omp/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/omp/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/omp/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/omp/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/count.h" "$(@D)/cuda/include/thrust/system/omp/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/replace.h" "$(@D)/cuda/include/thrust/system/omp/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/omp/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/logical.h" "$(@D)/cuda/include/thrust/system/omp/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/partition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/omp/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/par.h" "$(@D)/cuda/include/thrust/system/omp/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/omp/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/transform.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/memory.h" "$(@D)/cuda/include/thrust/system/omp/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/vector.h" "$(@D)/cuda/include/thrust/system/tbb/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/memory.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/sort.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/partition.h" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/tbb/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/sequence.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/merge.h" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/gather.h" "$(@D)/cuda/include/thrust/system/tbb/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/sort.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/tbb/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reverse.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scatter.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/remove.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/vector.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/find.h" "$(@D)/cuda/include/thrust/system/tbb/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/merge.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/generate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/remove.h" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/for_each.h" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/equal.h" "$(@D)/cuda/include/thrust/system/tbb/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/tbb/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/tbb/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/tbb/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/tbb/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scan.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/extrema.h" "$(@D)/cuda/include/thrust/system/tbb/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/count.h" "$(@D)/cuda/include/thrust/system/tbb/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/replace.h" "$(@D)/cuda/include/thrust/system/tbb/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/get_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/tbb/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/logical.h" "$(@D)/cuda/include/thrust/system/tbb/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/partition.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/tbb/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/par.h" "$(@D)/cuda/include/thrust/system/tbb/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_intervals.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/tbb/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/transform.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/memory.h" "$(@D)/cuda/include/thrust/system/tbb/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/error_code.h" "$(@D)/cuda/include/thrust/system/error_code.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/vector.h" "$(@D)/cuda/include/thrust/system/cpp/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/partition.h" "$(@D)/cuda/include/thrust/system/cpp/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/unique.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cpp/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/merge.h" "$(@D)/cuda/include/thrust/system/cpp/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/gather.h" "$(@D)/cuda/include/thrust/system/cpp/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/sort.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cpp/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/find.h" "$(@D)/cuda/include/thrust/system/cpp/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/generate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/remove.h" "$(@D)/cuda/include/thrust/system/cpp/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cpp/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/equal.h" "$(@D)/cuda/include/thrust/system/cpp/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cpp/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cpp/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cpp/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cpp/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cpp/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/count.h" "$(@D)/cuda/include/thrust/system/cpp/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/replace.h" "$(@D)/cuda/include/thrust/system/cpp/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cpp/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/logical.h" "$(@D)/cuda/include/thrust/system/cpp/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cpp/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/par.h" "$(@D)/cuda/include/thrust/system/cpp/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cpp/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/transform.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/memory.h" "$(@D)/cuda/include/thrust/system/cpp/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/vector.h" "$(@D)/cuda/include/thrust/system/cuda/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/error.h" "$(@D)/cuda/include/thrust/system/cuda/error.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_device_to_device.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_device_to_device.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_allocator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_device.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_device.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_rle_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_rle_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_histogram_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_histogram_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_by_key_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_by_key_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_scan_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_scan_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_select_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_select_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_radix_sort_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_radix_sort_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_histo.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_histo.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_satomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_satomic.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_gatomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_gatomic.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_select.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_scan_prefix_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_scan_prefix_operators.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce_by_key.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_macro.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_namespace.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_histogram_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_histogram_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_rle_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_rle_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_select_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_select_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_satomic_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_satomic_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_sort_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_sort_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_gatomic_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_gatomic_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_by_key_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_by_key_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_prefix_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_prefix_operators.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_type.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_type.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/host/spinlock.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/host/spinlock.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_ptx.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_debug.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/cub.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/cub.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_shift.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_shift.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_arch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_intervals.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_cross_system.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_cross_system.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk.h" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/partition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/unique.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cuda_launch_config.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cuda_launch_config.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cub.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_symmetric_difference.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_symmetric_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/error.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/error.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/gather.h" "$(@D)/cuda/include/thrust/system/cuda/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/synchronize.h" "$(@D)/cuda/include/thrust/system/cuda/detail/synchronize.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/temporary_indirect_permutation.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_indirect_permutation.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/extern_shared_ptr.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extern_shared_ptr.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/set_operation.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/set_operation.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/balanced_path.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/balanced_path.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/virtualized_smem_closure.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/virtualized_smem_closure.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/set_operation.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/set_operation.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_closure.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_closure.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/alignment.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/alignment.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_sort_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_calculator.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_calculator.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_closure.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_closure.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/uninitialized.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/uninitialized.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/cached_temporary_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/cached_temporary_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_calculator.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_calculator.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_sort_each.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/default_decomposition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_intervals.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_intervals.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/default_decomposition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/throw_on_error.h" "$(@D)/cuda/include/thrust/system/cuda/detail/throw_on_error.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/find.h" "$(@D)/cuda/include/thrust/system/cuda/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/terminate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/terminate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/merge.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/trivial_copy.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/trivial_copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/generate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/execute_on_stream.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execute_on_stream.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/remove.h" "$(@D)/cuda/include/thrust/system/cuda/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/decomposition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/decomposition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/equal.h" "$(@D)/cuda/include/thrust/system/cuda/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/runtime_introspection.h" "$(@D)/cuda/include/thrust/system/cuda/detail/runtime_introspection.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cuda/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cuda/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/runtime_introspection.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/runtime_introspection.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cuda/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scan.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/synchronize.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/synchronize.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_union.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_union.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_intersection.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_intersection.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/count.h" "$(@D)/cuda/include/thrust/system/cuda/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/trivial_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/trivial_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_device_to_device.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_device_to_device.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/replace.h" "$(@D)/cuda/include/thrust/system/cuda/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/malloc.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/malloc.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/config.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/config.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/closure.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/closure.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/tail_flags.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/tail_flags.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/terminate.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/terminate.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/alignment.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/alignment.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/guarded_cuda_runtime_api.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/guarded_cuda_runtime_api.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/choose_sizes.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/choose_sizes.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/tuple_meta_transform.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_meta_transform.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/head_flags.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/head_flags.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/synchronize.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/synchronize.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/throw_on_error.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/throw_on_error.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/parameter_ptr.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/parameter_ptr.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launcher.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launcher.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/triple_chevron_launcher.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/triple_chevron_launcher.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launch_config.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launch_config.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/async.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/async.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/tuple_transform.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_transform.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/pointer_traits.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/pointer_traits.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/is_contiguous_iterator.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/is_contiguous_iterator.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/iterator.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/iterator.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/choose_sizes.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/choose_sizes.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/copy.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/copy.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/merge.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/merge.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/accumulate.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/accumulate.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/scan.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/scan.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/detail/stable_merge_sort.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/detail/stable_merge_sort.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/gather.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/gather.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/sort.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/sort.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/reduce.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/scatter.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/scatter.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/adjacent_difference.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/adjacent_difference.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/for_each.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/for_each.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/bulk.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/bulk.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/execution_policy.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/execution_policy.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/iterator/strided_iterator.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/iterator/strided_iterator.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/uninitialized.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/uninitialized.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/async.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/async.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/future.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/future.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/guarded_driver_types.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_driver_types.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cuda/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/logical.h" "$(@D)/cuda/include/thrust/system/cuda/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cuda/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/inclusive_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/inclusive_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/merge.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/block/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/merging_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/merging_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/exclusive_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/exclusive_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/odd_even_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/odd_even_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/par.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_cross_system.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_intervals.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cuda/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_difference.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/transform.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/experimental/pinned_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/experimental/pinned_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/memory.h" "$(@D)/cuda/include/thrust/system/cuda/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/remove.h" "$(@D)/cuda/include/thrust/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/tabulate.h" "$(@D)/cuda/include/thrust/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/for_each.h" "$(@D)/cuda/include/thrust/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/distance.h" "$(@D)/cuda/include/thrust/distance.h" && cp "/usr/local/cuda-8.0/include/thrust/reduce.h" "$(@D)/cuda/include/thrust/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/equal.h" "$(@D)/cuda/include/thrust/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/complex.h" "$(@D)/cuda/include/thrust/complex.h" && cp "/usr/local/cuda-8.0/include/thrust/device_allocator.h" "$(@D)/cuda/include/thrust/device_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/copy.h" "$(@D)/cuda/include/thrust/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/uninitialized_copy.h" "$(@D)/cuda/include/thrust/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/device_reference.h" "$(@D)/cuda/include/thrust/device_reference.h" && cp "/usr/local/cuda-8.0/include/thrust/binary_search.h" "$(@D)/cuda/include/thrust/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/set_operations.h" "$(@D)/cuda/include/thrust/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/swap.h" "$(@D)/cuda/include/thrust/swap.h" && cp "/usr/local/cuda-8.0/include/thrust/mismatch.h" "$(@D)/cuda/include/thrust/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/extrema.h" "$(@D)/cuda/include/thrust/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/count.h" "$(@D)/cuda/include/thrust/count.h" && cp "/usr/local/cuda-8.0/include/thrust/device_free.h" "$(@D)/cuda/include/thrust/device_free.h" && cp "/usr/local/cuda-8.0/include/thrust/random/discard_block_engine.h" "$(@D)/cuda/include/thrust/random/discard_block_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/normal_distribution.h" "$(@D)/cuda/include/thrust/random/normal_distribution.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/subtract_with_carry_engine.inl" "$(@D)/cuda/include/thrust/random/detail/subtract_with_carry_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/xor_combine_engine_max.h" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine_max.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_congruential_engine_discard.h" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine_discard.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/uniform_int_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_int_distribution.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/discard_block_engine.inl" "$(@D)/cuda/include/thrust/random/detail/discard_block_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/uniform_real_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_real_distribution.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/random_core_access.h" "$(@D)/cuda/include/thrust/random/detail/random_core_access.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/mod.h" "$(@D)/cuda/include/thrust/random/detail/mod.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_feedback_shift_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_congruential_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/xor_combine_engine.inl" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/normal_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/normal_distribution.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/normal_distribution_base.h" "$(@D)/cuda/include/thrust/random/detail/normal_distribution_base.h" && cp "/usr/local/cuda-8.0/include/thrust/random/uniform_int_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_int_distribution.h" && cp "/usr/local/cuda-8.0/include/thrust/random/linear_feedback_shift_engine.h" "$(@D)/cuda/include/thrust/random/linear_feedback_shift_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/xor_combine_engine.h" "$(@D)/cuda/include/thrust/random/xor_combine_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/subtract_with_carry_engine.h" "$(@D)/cuda/include/thrust/random/subtract_with_carry_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/linear_congruential_engine.h" "$(@D)/cuda/include/thrust/random/linear_congruential_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/uniform_real_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_real_distribution.h" && cp "/usr/local/cuda-8.0/include/thrust/functional.h" "$(@D)/cuda/include/thrust/functional.h" && cp "/usr/local/cuda-8.0/include/thrust/replace.h" "$(@D)/cuda/include/thrust/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/device_new_allocator.h" "$(@D)/cuda/include/thrust/device_new_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/host_vector.h" "$(@D)/cuda/include/thrust/host_vector.h" && cp "/usr/local/cuda-8.0/include/thrust/version.h" "$(@D)/cuda/include/thrust/version.h" && cp "/usr/local/cuda-8.0/include/thrust/inner_product.h" "$(@D)/cuda/include/thrust/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_traits.h" "$(@D)/cuda/include/thrust/iterator/iterator_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/discard_iterator.h" "$(@D)/cuda/include/thrust/iterator/discard_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/retag.h" "$(@D)/cuda/include/thrust/iterator/retag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/permutation_iterator.h" "$(@D)/cuda/include/thrust/iterator/permutation_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/transform_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/reverse_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/zip_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/counting_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/counting_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/distance_from_result.h" "$(@D)/cuda/include/thrust/iterator/detail/distance_from_result.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/host_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/host_system_tag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_traversal_tags.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traversal_tags.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/retag.h" "$(@D)/cuda/include/thrust/iterator/detail/retag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/tagged_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/tagged_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_traits.inl" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traits.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/minimum_category.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_category.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/discard_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/discard_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_category_to_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/zip_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/normal_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/normal_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/join_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/join_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/device_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/device_system_tag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/universal_categories.h" "$(@D)/cuda/include/thrust/iterator/detail/universal_categories.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/reverse_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/minimum_system.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_system.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/tuple_of_iterator_references.h" "$(@D)/cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/is_iterator_category.h" "$(@D)/cuda/include/thrust/iterator/detail/is_iterator_category.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/permutation_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/permutation_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/any_assign.h" "$(@D)/cuda/include/thrust/iterator/detail/any_assign.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/any_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/any_system_tag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/is_trivial_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/is_trivial_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_category_to_system.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_system.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_adaptor_base.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_adaptor_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/constant_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/constant_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/transform_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_facade_category.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_facade_category.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/constant_iterator.h" "$(@D)/cuda/include/thrust/iterator/constant_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/counting_iterator.h" "$(@D)/cuda/include/thrust/iterator/counting_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_adaptor.h" "$(@D)/cuda/include/thrust/iterator/iterator_adaptor.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_facade.h" "$(@D)/cuda/include/thrust/iterator/iterator_facade.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_categories.h" "$(@D)/cuda/include/thrust/iterator/iterator_categories.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/reverse_iterator.h" "$(@D)/cuda/include/thrust/iterator/reverse_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/zip_iterator.h" "$(@D)/cuda/include/thrust/iterator/zip_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/logical.h" "$(@D)/cuda/include/thrust/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/tuple.h" "$(@D)/cuda/include/thrust/tuple.h" && cp "/usr/local/cuda-8.0/include/thrust/memory.h" "$(@D)/cuda/include/thrust/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/random.h" "$(@D)/cuda/include/thrust/random.h" && cp "/usr/local/cuda-8.0/include/thrust/fill.h" "$(@D)/cuda/include/thrust/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/transform.h" "$(@D)/cuda/include/thrust/transform.h" && cp "/usr/local/cuda-8.0/include/texture_types.h" "$(@D)/cuda/include/texture_types.h" && cp "/usr/local/cuda-8.0/include/nppversion.h" "$(@D)/cuda/include/nppversion.h" && cp "/usr/local/cuda-8.0/include/cuda_texture_types.h" "$(@D)/cuda/include/cuda_texture_types.h" && cp "/usr/local/cuda-8.0/include/fatbinary.h" "$(@D)/cuda/include/fatbinary.h" && cp "/usr/local/cuda-8.0/include/cublasXt.h" "$(@D)/cuda/include/cublasXt.h" && cp "/usr/local/cuda-8.0/include/cuda_fp16.h" "$(@D)/cuda/include/cuda_fp16.h" && cp "/usr/local/cuda-8.0/include/vector_functions.h" "$(@D)/cuda/include/vector_functions.h" && cp "/usr/local/cuda-8.0/include/cusparse.h" "$(@D)/cuda/include/cusparse.h" && cp "/usr/local/cuda-8.0/include/nppi_filtering_functions.h" "$(@D)/cuda/include/nppi_filtering_functions.h" && cp "/usr/local/cuda-8.0/include/nppi_morphological_operations.h" "$(@D)/cuda/include/nppi_morphological_operations.h" && cp "/usr/local/cuda-8.0/include/sobol_direction_vectors.h" "$(@D)/cuda/include/sobol_direction_vectors.h" && cp "/usr/local/cuda-8.0/include/nvblas.h" "$(@D)/cuda/include/nvblas.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32dc_p_11213.h" "$(@D)/cuda/include/curand_mtgp32dc_p_11213.h" && cp "/usr/local/cuda-8.0/include/nvcuvid.h" "$(@D)/cuda/include/nvcuvid.h" && cp "/usr/local/cuda-8.0/include/cuda_runtime_api.h" "$(@D)/cuda/include/cuda_runtime_api.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32_kernel.h" "$(@D)/cuda/include/curand_mtgp32_kernel.h" && cp "/usr/local/cuda-8.0/include/cublas_v2.h" "$(@D)/cuda/include/cublas_v2.h" && cp "/usr/local/cuda-8.0/include/builtin_types.h" "$(@D)/cuda/include/builtin_types.h" && cp "/usr/local/cuda-8.0/include/nppi_geometry_transforms.h" "$(@D)/cuda/include/nppi_geometry_transforms.h" && cp "/usr/local/cuda-8.0/include/npps_support_functions.h" "$(@D)/cuda/include/npps_support_functions.h" && cp "/usr/local/cuda-8.0/include/cufftw.h" "$(@D)/cuda/include/cufftw.h" && cp "/usr/local/cuda-8.0/include/cuda_device_runtime_api.h" "$(@D)/cuda/include/cuda_device_runtime_api.h" && cp "/usr/local/cuda-8.0/include/sm_30_intrinsics.hpp" "$(@D)/cuda/include/sm_30_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/vector_types.h" "$(@D)/cuda/include/vector_types.h" && cp "/usr/local/cuda-8.0/include/sm_35_atomic_functions.h" "$(@D)/cuda/include/sm_35_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/sm_20_intrinsics.h" "$(@D)/cuda/include/sm_20_intrinsics.h" && cp "/usr/local/cuda-8.0/include/driver_types.h" "$(@D)/cuda/include/driver_types.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtCudaRt.h" "$(@D)/cuda/include/nvToolsExtCudaRt.h" && cp "/usr/local/cuda-8.0/include/curand_globals.h" "$(@D)/cuda/include/curand_globals.h" && cp "/usr/local/cuda-8.0/include/device_atomic_functions.h" "$(@D)/cuda/include/device_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/surface_types.h" "$(@D)/cuda/include/surface_types.h" && cp "/usr/local/cuda-8.0/include/nvrtc.h" "$(@D)/cuda/include/nvrtc.h" && cp "/usr/local/cuda-8.0/include/nppdefs.h" "$(@D)/cuda/include/nppdefs.h" && cp "/usr/local/cuda-8.0/include/sm_60_atomic_functions.h" "$(@D)/cuda/include/sm_60_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/driver_functions.h" "$(@D)/cuda/include/driver_functions.h" && cp "/usr/local/cuda-8.0/include/cusolver_common.h" "$(@D)/cuda/include/cusolver_common.h" && cp "/usr/local/cuda-8.0/include/cublas.h" "$(@D)/cuda/include/cublas.h" && cp "/usr/local/cuda-8.0/include/curand_lognormal.h" "$(@D)/cuda/include/curand_lognormal.h" && cp "/usr/local/cuda-8.0/include/device_atomic_functions.hpp" "$(@D)/cuda/include/device_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/crt/device_runtime.h" "$(@D)/cuda/include/crt/device_runtime.h" && cp "/usr/local/cuda-8.0/include/crt/storage_class.h" "$(@D)/cuda/include/crt/storage_class.h" && cp "/usr/local/cuda-8.0/include/crt/func_macro.h" "$(@D)/cuda/include/crt/func_macro.h" && cp "/usr/local/cuda-8.0/include/crt/host_runtime.h" "$(@D)/cuda/include/crt/host_runtime.h" && cp "/usr/local/cuda-8.0/include/nppi_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/nppi_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-8.0/include/npps_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/npps_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-8.0/include/nppi_computer_vision.h" "$(@D)/cuda/include/nppi_computer_vision.h" && cp "/usr/local/cuda-8.0/include/surface_functions.hpp" "$(@D)/cuda/include/surface_functions.hpp" && cp "/usr/local/cuda-8.0/include/surface_functions.h" "$(@D)/cuda/include/surface_functions.h" && cp "/usr/local/cuda-8.0/include/curand_normal_static.h" "$(@D)/cuda/include/curand_normal_static.h" && cp "/usr/local/cuda-8.0/include/curand.h" "$(@D)/cuda/include/curand.h" && cp "/usr/local/cuda-8.0/include/math_functions_dbl_ptx3.h" "$(@D)/cuda/include/math_functions_dbl_ptx3.h" && cp "/usr/local/cuda-8.0/include/curand_philox4x32_x.h" "$(@D)/cuda/include/curand_philox4x32_x.h" && cp "/usr/local/cuda-8.0/include/nppi_threshold_and_compare_operations.h" "$(@D)/cuda/include/nppi_threshold_and_compare_operations.h" && cp "/usr/local/cuda-8.0/include/nvml.h" "$(@D)/cuda/include/nvml.h" && cp "/usr/local/cuda-8.0/include/npps.h" "$(@D)/cuda/include/npps.h" && cp "/usr/local/cuda-8.0/include/cuda_vdpau_interop.h" "$(@D)/cuda/include/cuda_vdpau_interop.h" && cp "/usr/local/cuda-8.0/include/sm_61_intrinsics.hpp" "$(@D)/cuda/include/sm_61_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/cublas_api.h" "$(@D)/cuda/include/cublas_api.h" && cp "/usr/local/cuda-8.0/include/nppi_color_conversion.h" "$(@D)/cuda/include/nppi_color_conversion.h" && cp "/usr/local/cuda-8.0/include/math_functions_dbl_ptx3.hpp" "$(@D)/cuda/include/math_functions_dbl_ptx3.hpp" && cp "/usr/local/cuda-8.0/include/nppcore.h" "$(@D)/cuda/include/nppcore.h" && cp "/usr/local/cuda-8.0/include/cudaGL.h" "$(@D)/cuda/include/cudaGL.h" && cp "/usr/local/cuda-8.0/include/fatBinaryCtl.h" "$(@D)/cuda/include/fatBinaryCtl.h" && cp "/usr/local/cuda-8.0/include/npps_statistics_functions.h" "$(@D)/cuda/include/npps_statistics_functions.h" && cp "/usr/local/cuda-8.0/include/cudaVDPAU.h" "$(@D)/cuda/include/cudaVDPAU.h" && cp "/usr/local/cuda-8.0/include/curand_poisson.h" "$(@D)/cuda/include/curand_poisson.h" && cp "/usr/local/cuda-8.0/include/cusolverDn.h" "$(@D)/cuda/include/cusolverDn.h" && cp "/usr/local/cuda-8.0/include/cuda_profiler_api.h" "$(@D)/cuda/include/cuda_profiler_api.h" && cp "/usr/local/cuda-8.0/include/sm_20_atomic_functions.h" "$(@D)/cuda/include/sm_20_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/nvfunctional" "$(@D)/cuda/include/nvfunctional" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/include/CL/cl.h" "$(@D)/cuda/include/CL/cl.h" && cp "/usr/local/cuda-9.0/include/CL/cl.hpp" "$(@D)/cuda/include/CL/cl.hpp" && cp "/usr/local/cuda-9.0/include/CL/cl_egl.h" "$(@D)/cuda/include/CL/cl_egl.h" && cp "/usr/local/cuda-9.0/include/CL/cl_ext.h" "$(@D)/cuda/include/CL/cl_ext.h" && cp "/usr/local/cuda-9.0/include/CL/cl_gl.h" "$(@D)/cuda/include/CL/cl_gl.h" && cp "/usr/local/cuda-9.0/include/CL/cl_gl_ext.h" "$(@D)/cuda/include/CL/cl_gl_ext.h" && cp "/usr/local/cuda-9.0/include/CL/cl_platform.h" "$(@D)/cuda/include/CL/cl_platform.h" && cp "/usr/local/cuda-9.0/include/CL/opencl.h" "$(@D)/cuda/include/CL/opencl.h" && cp "/usr/local/cuda-9.0/include/builtin_types.h" "$(@D)/cuda/include/builtin_types.h" && cp "/usr/local/cuda-9.0/include/channel_descriptor.h" "$(@D)/cuda/include/channel_descriptor.h" && cp "/usr/local/cuda-9.0/include/common_functions.h" "$(@D)/cuda/include/common_functions.h" && cp "/usr/local/cuda-9.0/include/cooperative_groups.h" "$(@D)/cuda/include/cooperative_groups.h" && cp "/usr/local/cuda-9.0/include/cooperative_groups_helpers.h" "$(@D)/cuda/include/cooperative_groups_helpers.h" && cp "/usr/local/cuda-9.0/include/crt/common_functions.h" "$(@D)/cuda/include/crt/common_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_double_functions.h" "$(@D)/cuda/include/crt/device_double_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_double_functions.hpp" "$(@D)/cuda/include/crt/device_double_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/device_functions.h" "$(@D)/cuda/include/crt/device_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_functions.hpp" "$(@D)/cuda/include/crt/device_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/func_macro.h" "$(@D)/cuda/include/crt/func_macro.h" && cp "/usr/local/cuda-9.0/include/crt/host_config.h" "$(@D)/cuda/include/crt/host_config.h" && cp "/usr/local/cuda-9.0/include/crt/host_defines.h" "$(@D)/cuda/include/crt/host_defines.h" && cp "/usr/local/cuda-9.0/include/crt/host_runtime.h" "$(@D)/cuda/include/crt/host_runtime.h" && cp "/usr/local/cuda-9.0/include/crt/math_functions.h" "$(@D)/cuda/include/crt/math_functions.h" && cp "/usr/local/cuda-9.0/include/crt/math_functions.hpp" "$(@D)/cuda/include/crt/math_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/mma.h" "$(@D)/cuda/include/crt/mma.h" && cp "/usr/local/cuda-9.0/include/crt/mma.hpp" "$(@D)/cuda/include/crt/mma.hpp" && cp "/usr/local/cuda-9.0/include/crt/nvfunctional" "$(@D)/cuda/include/crt/nvfunctional" && cp "/usr/local/cuda-9.0/include/crt/sm_70_rt.h" "$(@D)/cuda/include/crt/sm_70_rt.h" && cp "/usr/local/cuda-9.0/include/crt/sm_70_rt.hpp" "$(@D)/cuda/include/crt/sm_70_rt.hpp" && cp "/usr/local/cuda-9.0/include/crt/storage_class.h" "$(@D)/cuda/include/crt/storage_class.h" && cp "/usr/local/cuda-9.0/include/cuComplex.h" "$(@D)/cuda/include/cuComplex.h" && cp "/usr/local/cuda-9.0/include/cublas.h" "$(@D)/cuda/include/cublas.h" && cp "/usr/local/cuda-9.0/include/cublasXt.h" "$(@D)/cuda/include/cublasXt.h" && cp "/usr/local/cuda-9.0/include/cublas_api.h" "$(@D)/cuda/include/cublas_api.h" && cp "/usr/local/cuda-9.0/include/cublas_v2.h" "$(@D)/cuda/include/cublas_v2.h" && cp "/usr/local/cuda-9.0/include/cuda.h" "$(@D)/cuda/include/cuda.h" && cp "/usr/local/cuda-9.0/include/cudaEGL.h" "$(@D)/cuda/include/cudaEGL.h" && cp "/usr/local/cuda-9.0/include/cudaGL.h" "$(@D)/cuda/include/cudaGL.h" && cp "/usr/local/cuda-9.0/include/cudaProfiler.h" "$(@D)/cuda/include/cudaProfiler.h" && cp "/usr/local/cuda-9.0/include/cudaVDPAU.h" "$(@D)/cuda/include/cudaVDPAU.h" && cp "/usr/local/cuda-9.0/include/cuda_device_runtime_api.h" "$(@D)/cuda/include/cuda_device_runtime_api.h" && cp "/usr/local/cuda-9.0/include/cuda_fp16.h" "$(@D)/cuda/include/cuda_fp16.h" && cp "/usr/local/cuda-9.0/include/cuda_fp16.hpp" "$(@D)/cuda/include/cuda_fp16.hpp" && cp "/usr/local/cuda-9.0/include/cuda_gl_interop.h" "$(@D)/cuda/include/cuda_gl_interop.h" && cp "/usr/local/cuda-9.0/include/cuda_occupancy.h" "$(@D)/cuda/include/cuda_occupancy.h" && cp "/usr/local/cuda-9.0/include/cuda_profiler_api.h" "$(@D)/cuda/include/cuda_profiler_api.h" && cp "/usr/local/cuda-9.0/include/cuda_runtime.h" "$(@D)/cuda/include/cuda_runtime.h" && cp "/usr/local/cuda-9.0/include/cuda_runtime_api.h" "$(@D)/cuda/include/cuda_runtime_api.h" && cp "/usr/local/cuda-9.0/include/cuda_surface_types.h" "$(@D)/cuda/include/cuda_surface_types.h" && cp "/usr/local/cuda-9.0/include/cuda_texture_types.h" "$(@D)/cuda/include/cuda_texture_types.h" && cp "/usr/local/cuda-9.0/include/cuda_vdpau_interop.h" "$(@D)/cuda/include/cuda_vdpau_interop.h" && cp "/usr/local/cuda-9.0/include/cudalibxt.h" "$(@D)/cuda/include/cudalibxt.h" && cp "/usr/local/cuda-9.0/include/cudnn.h" "$(@D)/cuda/include/cudnn.h" && cp "/usr/local/cuda-9.0/include/cufft.h" "$(@D)/cuda/include/cufft.h" && cp "/usr/local/cuda-9.0/include/cufftXt.h" "$(@D)/cuda/include/cufftXt.h" && cp "/usr/local/cuda-9.0/include/cufftw.h" "$(@D)/cuda/include/cufftw.h" && cp "/usr/local/cuda-9.0/include/curand.h" "$(@D)/cuda/include/curand.h" && cp "/usr/local/cuda-9.0/include/curand_discrete.h" "$(@D)/cuda/include/curand_discrete.h" && cp "/usr/local/cuda-9.0/include/curand_discrete2.h" "$(@D)/cuda/include/curand_discrete2.h" && cp "/usr/local/cuda-9.0/include/curand_globals.h" "$(@D)/cuda/include/curand_globals.h" && cp "/usr/local/cuda-9.0/include/curand_kernel.h" "$(@D)/cuda/include/curand_kernel.h" && cp "/usr/local/cuda-9.0/include/curand_lognormal.h" "$(@D)/cuda/include/curand_lognormal.h" && cp "/usr/local/cuda-9.0/include/curand_mrg32k3a.h" "$(@D)/cuda/include/curand_mrg32k3a.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32.h" "$(@D)/cuda/include/curand_mtgp32.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32_host.h" "$(@D)/cuda/include/curand_mtgp32_host.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32_kernel.h" "$(@D)/cuda/include/curand_mtgp32_kernel.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32dc_p_11213.h" "$(@D)/cuda/include/curand_mtgp32dc_p_11213.h" && cp "/usr/local/cuda-9.0/include/curand_normal.h" "$(@D)/cuda/include/curand_normal.h" && cp "/usr/local/cuda-9.0/include/curand_normal_static.h" "$(@D)/cuda/include/curand_normal_static.h" && cp "/usr/local/cuda-9.0/include/curand_philox4x32_x.h" "$(@D)/cuda/include/curand_philox4x32_x.h" && cp "/usr/local/cuda-9.0/include/curand_poisson.h" "$(@D)/cuda/include/curand_poisson.h" && cp "/usr/local/cuda-9.0/include/curand_precalc.h" "$(@D)/cuda/include/curand_precalc.h" && cp "/usr/local/cuda-9.0/include/curand_uniform.h" "$(@D)/cuda/include/curand_uniform.h" && cp "/usr/local/cuda-9.0/include/cusolverDn.h" "$(@D)/cuda/include/cusolverDn.h" && cp "/usr/local/cuda-9.0/include/cusolverRf.h" "$(@D)/cuda/include/cusolverRf.h" && cp "/usr/local/cuda-9.0/include/cusolverSp.h" "$(@D)/cuda/include/cusolverSp.h" && cp "/usr/local/cuda-9.0/include/cusolverSp_LOWLEVEL_PREVIEW.h" "$(@D)/cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h" && cp "/usr/local/cuda-9.0/include/cusolver_common.h" "$(@D)/cuda/include/cusolver_common.h" && cp "/usr/local/cuda-9.0/include/cusparse.h" "$(@D)/cuda/include/cusparse.h" && cp "/usr/local/cuda-9.0/include/cusparse_v2.h" "$(@D)/cuda/include/cusparse_v2.h" && cp "/usr/local/cuda-9.0/include/device_atomic_functions.h" "$(@D)/cuda/include/device_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/device_atomic_functions.hpp" "$(@D)/cuda/include/device_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_double_functions.h" "$(@D)/cuda/include/device_double_functions.h" && cp "/usr/local/cuda-9.0/include/device_double_functions.hpp" "$(@D)/cuda/include/device_double_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_functions.h" "$(@D)/cuda/include/device_functions.h" && cp "/usr/local/cuda-9.0/include/device_functions.hpp" "$(@D)/cuda/include/device_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_functions_decls.h" "$(@D)/cuda/include/device_functions_decls.h" && cp "/usr/local/cuda-9.0/include/device_launch_parameters.h" "$(@D)/cuda/include/device_launch_parameters.h" && cp "/usr/local/cuda-9.0/include/device_types.h" "$(@D)/cuda/include/device_types.h" && cp "/usr/local/cuda-9.0/include/driver_functions.h" "$(@D)/cuda/include/driver_functions.h" && cp "/usr/local/cuda-9.0/include/driver_types.h" "$(@D)/cuda/include/driver_types.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuda.h" "$(@D)/cuda/include/dynlink_cuda.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuda_cuda.h" "$(@D)/cuda/include/dynlink_cuda_cuda.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuviddec.h" "$(@D)/cuda/include/dynlink_cuviddec.h" && cp "/usr/local/cuda-9.0/include/dynlink_nvcuvid.h" "$(@D)/cuda/include/dynlink_nvcuvid.h" && cp "/usr/local/cuda-9.0/include/fatBinaryCtl.h" "$(@D)/cuda/include/fatBinaryCtl.h" && cp "/usr/local/cuda-9.0/include/fatbinary.h" "$(@D)/cuda/include/fatbinary.h" && cp "/usr/local/cuda-9.0/include/host_config.h" "$(@D)/cuda/include/host_config.h" && cp "/usr/local/cuda-9.0/include/host_defines.h" "$(@D)/cuda/include/host_defines.h" && cp "/usr/local/cuda-9.0/include/library_types.h" "$(@D)/cuda/include/library_types.h" && cp "/usr/local/cuda-9.0/include/math_constants.h" "$(@D)/cuda/include/math_constants.h" && cp "/usr/local/cuda-9.0/include/math_functions.h" "$(@D)/cuda/include/math_functions.h" && cp "/usr/local/cuda-9.0/include/math_functions.hpp" "$(@D)/cuda/include/math_functions.hpp" && cp "/usr/local/cuda-9.0/include/math_functions_dbl_ptx3.h" "$(@D)/cuda/include/math_functions_dbl_ptx3.h" && cp "/usr/local/cuda-9.0/include/math_functions_dbl_ptx3.hpp" "$(@D)/cuda/include/math_functions_dbl_ptx3.hpp" && cp "/usr/local/cuda-9.0/include/mma.h" "$(@D)/cuda/include/mma.h" && cp "/usr/local/cuda-9.0/include/npp.h" "$(@D)/cuda/include/npp.h" && cp "/usr/local/cuda-9.0/include/nppcore.h" "$(@D)/cuda/include/nppcore.h" && cp "/usr/local/cuda-9.0/include/nppdefs.h" "$(@D)/cuda/include/nppdefs.h" && cp "/usr/local/cuda-9.0/include/nppi.h" "$(@D)/cuda/include/nppi.h" && cp "/usr/local/cuda-9.0/include/nppi_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/nppi_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-9.0/include/nppi_color_conversion.h" "$(@D)/cuda/include/nppi_color_conversion.h" && cp "/usr/local/cuda-9.0/include/nppi_compression_functions.h" "$(@D)/cuda/include/nppi_compression_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_computer_vision.h" "$(@D)/cuda/include/nppi_computer_vision.h" && cp "/usr/local/cuda-9.0/include/nppi_data_exchange_and_initialization.h" "$(@D)/cuda/include/nppi_data_exchange_and_initialization.h" && cp "/usr/local/cuda-9.0/include/nppi_filtering_functions.h" "$(@D)/cuda/include/nppi_filtering_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_geometry_transforms.h" "$(@D)/cuda/include/nppi_geometry_transforms.h" && cp "/usr/local/cuda-9.0/include/nppi_linear_transforms.h" "$(@D)/cuda/include/nppi_linear_transforms.h" && cp "/usr/local/cuda-9.0/include/nppi_morphological_operations.h" "$(@D)/cuda/include/nppi_morphological_operations.h" && cp "/usr/local/cuda-9.0/include/nppi_statistics_functions.h" "$(@D)/cuda/include/nppi_statistics_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_support_functions.h" "$(@D)/cuda/include/nppi_support_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_threshold_and_compare_operations.h" "$(@D)/cuda/include/nppi_threshold_and_compare_operations.h" && cp "/usr/local/cuda-9.0/include/npps.h" "$(@D)/cuda/include/npps.h" && cp "/usr/local/cuda-9.0/include/npps_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/npps_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-9.0/include/npps_conversion_functions.h" "$(@D)/cuda/include/npps_conversion_functions.h" && cp "/usr/local/cuda-9.0/include/npps_filtering_functions.h" "$(@D)/cuda/include/npps_filtering_functions.h" && cp "/usr/local/cuda-9.0/include/npps_initialization.h" "$(@D)/cuda/include/npps_initialization.h" && cp "/usr/local/cuda-9.0/include/npps_statistics_functions.h" "$(@D)/cuda/include/npps_statistics_functions.h" && cp "/usr/local/cuda-9.0/include/npps_support_functions.h" "$(@D)/cuda/include/npps_support_functions.h" && cp "/usr/local/cuda-9.0/include/nppversion.h" "$(@D)/cuda/include/nppversion.h" && cp "/usr/local/cuda-9.0/include/nvToolsExt.h" "$(@D)/cuda/include/nvToolsExt.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtCuda.h" "$(@D)/cuda/include/nvToolsExtCuda.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtCudaRt.h" "$(@D)/cuda/include/nvToolsExtCudaRt.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtMeta.h" "$(@D)/cuda/include/nvToolsExtMeta.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtSync.h" "$(@D)/cuda/include/nvToolsExtSync.h" && cp "/usr/local/cuda-9.0/include/nvblas.h" "$(@D)/cuda/include/nvblas.h" && cp "/usr/local/cuda-9.0/include/nvfunctional" "$(@D)/cuda/include/nvfunctional" && cp "/usr/local/cuda-9.0/include/nvgraph.h" "$(@D)/cuda/include/nvgraph.h" && cp "/usr/local/cuda-9.0/include/nvml.h" "$(@D)/cuda/include/nvml.h" && cp "/usr/local/cuda-9.0/include/nvrtc.h" "$(@D)/cuda/include/nvrtc.h" && cp "/usr/local/cuda-9.0/include/sm_20_atomic_functions.h" "$(@D)/cuda/include/sm_20_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_20_atomic_functions.hpp" "$(@D)/cuda/include/sm_20_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_20_intrinsics.h" "$(@D)/cuda/include/sm_20_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_20_intrinsics.hpp" "$(@D)/cuda/include/sm_20_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_30_intrinsics.h" "$(@D)/cuda/include/sm_30_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_30_intrinsics.hpp" "$(@D)/cuda/include/sm_30_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_32_atomic_functions.h" "$(@D)/cuda/include/sm_32_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_32_atomic_functions.hpp" "$(@D)/cuda/include/sm_32_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_32_intrinsics.h" "$(@D)/cuda/include/sm_32_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_32_intrinsics.hpp" "$(@D)/cuda/include/sm_32_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_35_atomic_functions.h" "$(@D)/cuda/include/sm_35_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_35_intrinsics.h" "$(@D)/cuda/include/sm_35_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_60_atomic_functions.h" "$(@D)/cuda/include/sm_60_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_60_atomic_functions.hpp" "$(@D)/cuda/include/sm_60_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_61_intrinsics.h" "$(@D)/cuda/include/sm_61_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_61_intrinsics.hpp" "$(@D)/cuda/include/sm_61_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sobol_direction_vectors.h" "$(@D)/cuda/include/sobol_direction_vectors.h" && cp "/usr/local/cuda-9.0/include/surface_functions.h" "$(@D)/cuda/include/surface_functions.h" && cp "/usr/local/cuda-9.0/include/surface_functions.hpp" "$(@D)/cuda/include/surface_functions.hpp" && cp "/usr/local/cuda-9.0/include/surface_indirect_functions.h" "$(@D)/cuda/include/surface_indirect_functions.h" && cp "/usr/local/cuda-9.0/include/surface_indirect_functions.hpp" "$(@D)/cuda/include/surface_indirect_functions.hpp" && cp "/usr/local/cuda-9.0/include/surface_types.h" "$(@D)/cuda/include/surface_types.h" && cp "/usr/local/cuda-9.0/include/texture_fetch_functions.h" "$(@D)/cuda/include/texture_fetch_functions.h" && cp "/usr/local/cuda-9.0/include/texture_fetch_functions.hpp" "$(@D)/cuda/include/texture_fetch_functions.hpp" && cp "/usr/local/cuda-9.0/include/texture_indirect_functions.h" "$(@D)/cuda/include/texture_indirect_functions.h" && cp "/usr/local/cuda-9.0/include/texture_indirect_functions.hpp" "$(@D)/cuda/include/texture_indirect_functions.hpp" && cp "/usr/local/cuda-9.0/include/texture_types.h" "$(@D)/cuda/include/texture_types.h" && cp "/usr/local/cuda-9.0/include/thrust/adjacent_difference.h" "$(@D)/cuda/include/thrust/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/advance.h" "$(@D)/cuda/include/thrust/advance.h" && cp "/usr/local/cuda-9.0/include/thrust/binary_search.h" "$(@D)/cuda/include/thrust/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/complex.h" "$(@D)/cuda/include/thrust/complex.h" && cp "/usr/local/cuda-9.0/include/thrust/copy.h" "$(@D)/cuda/include/thrust/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/count.h" "$(@D)/cuda/include/thrust/count.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/detail/adjacent_difference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/advance.inl" "$(@D)/cuda/include/thrust/detail/advance.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/allocator_traits.h" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/allocator_traits.inl" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/copy_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/copy_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/default_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/default_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/destroy_range.h" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/destroy_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/fill_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/fill_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/malloc_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/malloc_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/no_throw_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/no_throw_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/tagged_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/tagged_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/temporary_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/temporary_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/binary_search.inl" "$(@D)/cuda/include/thrust/detail/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/arithmetic.h" "$(@D)/cuda/include/thrust/detail/complex/arithmetic.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/c99math.h" "$(@D)/cuda/include/thrust/detail/complex/c99math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/catrig.h" "$(@D)/cuda/include/thrust/detail/complex/catrig.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/catrigf.h" "$(@D)/cuda/include/thrust/detail/complex/catrigf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ccosh.h" "$(@D)/cuda/include/thrust/detail/complex/ccosh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ccoshf.h" "$(@D)/cuda/include/thrust/detail/complex/ccoshf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cexp.h" "$(@D)/cuda/include/thrust/detail/complex/cexp.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cexpf.h" "$(@D)/cuda/include/thrust/detail/complex/cexpf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/clog.h" "$(@D)/cuda/include/thrust/detail/complex/clog.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/clogf.h" "$(@D)/cuda/include/thrust/detail/complex/clogf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/complex.inl" "$(@D)/cuda/include/thrust/detail/complex/complex.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cpow.h" "$(@D)/cuda/include/thrust/detail/complex/cpow.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cpowf.h" "$(@D)/cuda/include/thrust/detail/complex/cpowf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cproj.h" "$(@D)/cuda/include/thrust/detail/complex/cproj.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csinh.h" "$(@D)/cuda/include/thrust/detail/complex/csinh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csinhf.h" "$(@D)/cuda/include/thrust/detail/complex/csinhf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csqrt.h" "$(@D)/cuda/include/thrust/detail/complex/csqrt.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csqrtf.h" "$(@D)/cuda/include/thrust/detail/complex/csqrtf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ctanh.h" "$(@D)/cuda/include/thrust/detail/complex/ctanh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ctanhf.h" "$(@D)/cuda/include/thrust/detail/complex/ctanhf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/math_private.h" "$(@D)/cuda/include/thrust/detail/complex/math_private.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/stream.h" "$(@D)/cuda/include/thrust/detail/complex/stream.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config.h" "$(@D)/cuda/include/thrust/detail/config.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/compiler.h" "$(@D)/cuda/include/thrust/detail/config/compiler.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/compiler_fence.h" "$(@D)/cuda/include/thrust/detail/config/compiler_fence.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/config.h" "$(@D)/cuda/include/thrust/detail/config/config.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/debug.h" "$(@D)/cuda/include/thrust/detail/config/debug.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/device_system.h" "$(@D)/cuda/include/thrust/detail/config/device_system.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/exec_check_disable.h" "$(@D)/cuda/include/thrust/detail/config/exec_check_disable.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/forceinline.h" "$(@D)/cuda/include/thrust/detail/config/forceinline.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/global_workarounds.h" "$(@D)/cuda/include/thrust/detail/config/global_workarounds.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/host_device.h" "$(@D)/cuda/include/thrust/detail/config/host_device.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/host_system.h" "$(@D)/cuda/include/thrust/detail/config/host_system.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/simple_defines.h" "$(@D)/cuda/include/thrust/detail/config/simple_defines.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/contiguous_storage.h" "$(@D)/cuda/include/thrust/detail/contiguous_storage.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/contiguous_storage.inl" "$(@D)/cuda/include/thrust/detail/contiguous_storage.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy.h" "$(@D)/cuda/include/thrust/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy.inl" "$(@D)/cuda/include/thrust/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy_if.h" "$(@D)/cuda/include/thrust/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy_if.inl" "$(@D)/cuda/include/thrust/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/count.inl" "$(@D)/cuda/include/thrust/detail/count.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/cstdint.h" "$(@D)/cuda/include/thrust/detail/cstdint.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_delete.inl" "$(@D)/cuda/include/thrust/detail/device_delete.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_free.inl" "$(@D)/cuda/include/thrust/detail/device_free.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_malloc.inl" "$(@D)/cuda/include/thrust/detail/device_malloc.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_new.inl" "$(@D)/cuda/include/thrust/detail/device_new.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_ptr.inl" "$(@D)/cuda/include/thrust/detail/device_ptr.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_reference.inl" "$(@D)/cuda/include/thrust/detail/device_reference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_vector.inl" "$(@D)/cuda/include/thrust/detail/device_vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/dispatch/is_trivial_copy.h" "$(@D)/cuda/include/thrust/detail/dispatch/is_trivial_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/distance.inl" "$(@D)/cuda/include/thrust/detail/distance.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/equal.inl" "$(@D)/cuda/include/thrust/detail/equal.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/execute_with_allocator.h" "$(@D)/cuda/include/thrust/detail/execute_with_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/execution_policy.h" "$(@D)/cuda/include/thrust/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/extrema.inl" "$(@D)/cuda/include/thrust/detail/extrema.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/fill.inl" "$(@D)/cuda/include/thrust/detail/fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/find.inl" "$(@D)/cuda/include/thrust/detail/find.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/for_each.inl" "$(@D)/cuda/include/thrust/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/function.h" "$(@D)/cuda/include/thrust/detail/function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional.inl" "$(@D)/cuda/include/thrust/detail/functional.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/actor.h" "$(@D)/cuda/include/thrust/detail/functional/actor.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/actor.inl" "$(@D)/cuda/include/thrust/detail/functional/actor.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/argument.h" "$(@D)/cuda/include/thrust/detail/functional/argument.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/composite.h" "$(@D)/cuda/include/thrust/detail/functional/composite.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/arithmetic_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/arithmetic_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/assignment_operator.h" "$(@D)/cuda/include/thrust/detail/functional/operators/assignment_operator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/bitwise_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/bitwise_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/compound_assignment_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/logical_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/logical_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/operator_adaptors.h" "$(@D)/cuda/include/thrust/detail/functional/operators/operator_adaptors.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/relational_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/relational_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/placeholder.h" "$(@D)/cuda/include/thrust/detail/functional/placeholder.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/value.h" "$(@D)/cuda/include/thrust/detail/functional/value.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/gather.inl" "$(@D)/cuda/include/thrust/detail/gather.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/generate.inl" "$(@D)/cuda/include/thrust/detail/generate.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/get_iterator_value.h" "$(@D)/cuda/include/thrust/detail/get_iterator_value.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/host_vector.inl" "$(@D)/cuda/include/thrust/detail/host_vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/inner_product.inl" "$(@D)/cuda/include/thrust/detail/inner_product.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/integer_math.h" "$(@D)/cuda/include/thrust/detail/integer_math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/integer_traits.h" "$(@D)/cuda/include/thrust/detail/integer_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/internal_functional.h" "$(@D)/cuda/include/thrust/detail/internal_functional.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/logical.inl" "$(@D)/cuda/include/thrust/detail/logical.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/merge.inl" "$(@D)/cuda/include/thrust/detail/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/minmax.h" "$(@D)/cuda/include/thrust/detail/minmax.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/mismatch.inl" "$(@D)/cuda/include/thrust/detail/mismatch.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/mpl/math.h" "$(@D)/cuda/include/thrust/detail/mpl/math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/numeric_traits.h" "$(@D)/cuda/include/thrust/detail/numeric_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/overlapped_copy.h" "$(@D)/cuda/include/thrust/detail/overlapped_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/pair.inl" "$(@D)/cuda/include/thrust/detail/pair.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/partition.inl" "$(@D)/cuda/include/thrust/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/pointer.h" "$(@D)/cuda/include/thrust/detail/pointer.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/pointer.inl" "$(@D)/cuda/include/thrust/detail/pointer.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/range/head_flags.h" "$(@D)/cuda/include/thrust/detail/range/head_flags.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/range/tail_flags.h" "$(@D)/cuda/include/thrust/detail/range/tail_flags.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/raw_pointer_cast.h" "$(@D)/cuda/include/thrust/detail/raw_pointer_cast.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/raw_reference_cast.h" "$(@D)/cuda/include/thrust/detail/raw_reference_cast.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/reduce.inl" "$(@D)/cuda/include/thrust/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference.h" "$(@D)/cuda/include/thrust/detail/reference.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference.inl" "$(@D)/cuda/include/thrust/detail/reference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference_forward_declaration.h" "$(@D)/cuda/include/thrust/detail/reference_forward_declaration.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/remove.inl" "$(@D)/cuda/include/thrust/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/replace.inl" "$(@D)/cuda/include/thrust/detail/replace.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reverse.inl" "$(@D)/cuda/include/thrust/detail/reverse.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/scan.inl" "$(@D)/cuda/include/thrust/detail/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/scatter.inl" "$(@D)/cuda/include/thrust/detail/scatter.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/seq.h" "$(@D)/cuda/include/thrust/detail/seq.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/sequence.inl" "$(@D)/cuda/include/thrust/detail/sequence.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/set_operations.inl" "$(@D)/cuda/include/thrust/detail/set_operations.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/sort.inl" "$(@D)/cuda/include/thrust/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/static_assert.h" "$(@D)/cuda/include/thrust/detail/static_assert.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/static_map.h" "$(@D)/cuda/include/thrust/detail/static_map.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap.h" "$(@D)/cuda/include/thrust/detail/swap.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap.inl" "$(@D)/cuda/include/thrust/detail/swap.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap_ranges.inl" "$(@D)/cuda/include/thrust/detail/swap_ranges.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/tabulate.inl" "$(@D)/cuda/include/thrust/detail/tabulate.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_array.h" "$(@D)/cuda/include/thrust/detail/temporary_array.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_array.inl" "$(@D)/cuda/include/thrust/detail/temporary_array.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform.inl" "$(@D)/cuda/include/thrust/detail/transform.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform_reduce.inl" "$(@D)/cuda/include/thrust/detail/transform_reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform_scan.inl" "$(@D)/cuda/include/thrust/detail/transform_scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/trivial_sequence.h" "$(@D)/cuda/include/thrust/detail/trivial_sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple.inl" "$(@D)/cuda/include/thrust/detail/tuple.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple_meta_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_meta_transform.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_transform.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" "$(@D)/cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/function_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/function_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_member_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_member_function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_nested_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_nested_type.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_trivial_assign.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_trivial_assign.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/is_call_possible.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_call_possible.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/is_metafunction_defined.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_metafunction_defined.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/iterator/is_output_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/minimum_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/minimum_type.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/pointer_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/pointer_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/result_of_adaptable_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/unique.inl" "$(@D)/cuda/include/thrust/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/use_default.h" "$(@D)/cuda/include/thrust/detail/use_default.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/util/align.h" "$(@D)/cuda/include/thrust/detail/util/align.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/util/blocking.h" "$(@D)/cuda/include/thrust/detail/util/blocking.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/vector_base.h" "$(@D)/cuda/include/thrust/detail/vector_base.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/vector_base.inl" "$(@D)/cuda/include/thrust/detail/vector_base.inl" && cp "/usr/local/cuda-9.0/include/thrust/device_allocator.h" "$(@D)/cuda/include/thrust/device_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_delete.h" "$(@D)/cuda/include/thrust/device_delete.h" && cp "/usr/local/cuda-9.0/include/thrust/device_free.h" "$(@D)/cuda/include/thrust/device_free.h" && cp "/usr/local/cuda-9.0/include/thrust/device_malloc.h" "$(@D)/cuda/include/thrust/device_malloc.h" && cp "/usr/local/cuda-9.0/include/thrust/device_malloc_allocator.h" "$(@D)/cuda/include/thrust/device_malloc_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_new.h" "$(@D)/cuda/include/thrust/device_new.h" && cp "/usr/local/cuda-9.0/include/thrust/device_new_allocator.h" "$(@D)/cuda/include/thrust/device_new_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_ptr.h" "$(@D)/cuda/include/thrust/device_ptr.h" && cp "/usr/local/cuda-9.0/include/thrust/device_reference.h" "$(@D)/cuda/include/thrust/device_reference.h" && cp "/usr/local/cuda-9.0/include/thrust/device_vector.h" "$(@D)/cuda/include/thrust/device_vector.h" && cp "/usr/local/cuda-9.0/include/thrust/distance.h" "$(@D)/cuda/include/thrust/distance.h" && cp "/usr/local/cuda-9.0/include/thrust/equal.h" "$(@D)/cuda/include/thrust/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/execution_policy.h" "$(@D)/cuda/include/thrust/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/extrema.h" "$(@D)/cuda/include/thrust/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/fill.h" "$(@D)/cuda/include/thrust/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/find.h" "$(@D)/cuda/include/thrust/find.h" && cp "/usr/local/cuda-9.0/include/thrust/for_each.h" "$(@D)/cuda/include/thrust/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/functional.h" "$(@D)/cuda/include/thrust/functional.h" && cp "/usr/local/cuda-9.0/include/thrust/gather.h" "$(@D)/cuda/include/thrust/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/generate.h" "$(@D)/cuda/include/thrust/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/host_vector.h" "$(@D)/cuda/include/thrust/host_vector.h" && cp "/usr/local/cuda-9.0/include/thrust/inner_product.h" "$(@D)/cuda/include/thrust/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/constant_iterator.h" "$(@D)/cuda/include/thrust/iterator/constant_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/counting_iterator.h" "$(@D)/cuda/include/thrust/iterator/counting_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/any_assign.h" "$(@D)/cuda/include/thrust/iterator/detail/any_assign.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/any_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/any_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/constant_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/constant_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/counting_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/counting_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/device_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/device_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/discard_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/discard_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/distance_from_result.h" "$(@D)/cuda/include/thrust/iterator/detail/distance_from_result.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/host_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/host_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/is_iterator_category.h" "$(@D)/cuda/include/thrust/iterator/detail/is_iterator_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/is_trivial_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/is_trivial_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_adaptor_base.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_adaptor_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_to_system.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_system.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_to_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_facade_category.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_facade_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_traits.inl" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traits.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_traversal_tags.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traversal_tags.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/join_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/join_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/minimum_category.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/minimum_system.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_system.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/normal_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/normal_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/permutation_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/permutation_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/retag.h" "$(@D)/cuda/include/thrust/iterator/detail/retag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/reverse_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/reverse_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/tagged_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/tagged_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/transform_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/transform_output_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_output_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/tuple_of_iterator_references.h" "$(@D)/cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/universal_categories.h" "$(@D)/cuda/include/thrust/iterator/detail/universal_categories.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/zip_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/zip_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/discard_iterator.h" "$(@D)/cuda/include/thrust/iterator/discard_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_adaptor.h" "$(@D)/cuda/include/thrust/iterator/iterator_adaptor.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_categories.h" "$(@D)/cuda/include/thrust/iterator/iterator_categories.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_facade.h" "$(@D)/cuda/include/thrust/iterator/iterator_facade.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_traits.h" "$(@D)/cuda/include/thrust/iterator/iterator_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/permutation_iterator.h" "$(@D)/cuda/include/thrust/iterator/permutation_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/retag.h" "$(@D)/cuda/include/thrust/iterator/retag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/reverse_iterator.h" "$(@D)/cuda/include/thrust/iterator/reverse_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/transform_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/transform_output_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_output_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/zip_iterator.h" "$(@D)/cuda/include/thrust/iterator/zip_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/logical.h" "$(@D)/cuda/include/thrust/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/memory.h" "$(@D)/cuda/include/thrust/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/merge.h" "$(@D)/cuda/include/thrust/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/mismatch.h" "$(@D)/cuda/include/thrust/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/pair.h" "$(@D)/cuda/include/thrust/pair.h" && cp "/usr/local/cuda-9.0/include/thrust/partition.h" "$(@D)/cuda/include/thrust/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/random.h" "$(@D)/cuda/include/thrust/random.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/discard_block_engine.inl" "$(@D)/cuda/include/thrust/random/detail/discard_block_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_congruential_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_congruential_engine_discard.h" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine_discard.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_feedback_shift_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/mod.h" "$(@D)/cuda/include/thrust/random/detail/mod.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/normal_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/normal_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/normal_distribution_base.h" "$(@D)/cuda/include/thrust/random/detail/normal_distribution_base.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/random_core_access.h" "$(@D)/cuda/include/thrust/random/detail/random_core_access.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/subtract_with_carry_engine.inl" "$(@D)/cuda/include/thrust/random/detail/subtract_with_carry_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/uniform_int_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_int_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/uniform_real_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_real_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/xor_combine_engine.inl" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/xor_combine_engine_max.h" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine_max.h" && cp "/usr/local/cuda-9.0/include/thrust/random/discard_block_engine.h" "$(@D)/cuda/include/thrust/random/discard_block_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/linear_congruential_engine.h" "$(@D)/cuda/include/thrust/random/linear_congruential_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/linear_feedback_shift_engine.h" "$(@D)/cuda/include/thrust/random/linear_feedback_shift_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/normal_distribution.h" "$(@D)/cuda/include/thrust/random/normal_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/subtract_with_carry_engine.h" "$(@D)/cuda/include/thrust/random/subtract_with_carry_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/uniform_int_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_int_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/uniform_real_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_real_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/xor_combine_engine.h" "$(@D)/cuda/include/thrust/random/xor_combine_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/reduce.h" "$(@D)/cuda/include/thrust/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/remove.h" "$(@D)/cuda/include/thrust/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/replace.h" "$(@D)/cuda/include/thrust/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/reverse.h" "$(@D)/cuda/include/thrust/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/scan.h" "$(@D)/cuda/include/thrust/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/scatter.h" "$(@D)/cuda/include/thrust/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/sequence.h" "$(@D)/cuda/include/thrust/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/set_operations.h" "$(@D)/cuda/include/thrust/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/sort.h" "$(@D)/cuda/include/thrust/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/swap.h" "$(@D)/cuda/include/thrust/swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cpp/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cpp/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/count.h" "$(@D)/cuda/include/thrust/system/cpp/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/equal.h" "$(@D)/cuda/include/thrust/system/cpp/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cpp/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/find.h" "$(@D)/cuda/include/thrust/system/cpp/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cpp/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/gather.h" "$(@D)/cuda/include/thrust/system/cpp/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/generate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cpp/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cpp/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/logical.h" "$(@D)/cuda/include/thrust/system/cpp/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cpp/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/merge.h" "$(@D)/cuda/include/thrust/system/cpp/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cpp/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/par.h" "$(@D)/cuda/include/thrust/system/cpp/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/partition.h" "$(@D)/cuda/include/thrust/system/cpp/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/remove.h" "$(@D)/cuda/include/thrust/system/cpp/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/replace.h" "$(@D)/cuda/include/thrust/system/cpp/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cpp/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/sort.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cpp/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cpp/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/unique.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/memory.h" "$(@D)/cuda/include/thrust/system/cpp/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/vector.h" "$(@D)/cuda/include/thrust/system/cpp/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/config.h" "$(@D)/cuda/include/thrust/system/cuda/config.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cuda/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/agent_launcher.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/agent_launcher.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/alignment.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/alignment.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/util.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/count.h" "$(@D)/cuda/include/thrust/system/cuda/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cross_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/cub.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/cub.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/host/mutex.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/host/mutex.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_allocator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_arch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_debug.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_device.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_device.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_macro.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_namespace.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_ptx.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_type.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_type.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/equal.h" "$(@D)/cuda/include/thrust/system/cuda/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/error.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/error.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/find.h" "$(@D)/cuda/include/thrust/system/cuda/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/gather.h" "$(@D)/cuda/include/thrust/system/cuda/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/generate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/guarded_driver_types.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_driver_types.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cuda/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/internal/copy_cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_cross_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cuda/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/logical.h" "$(@D)/cuda/include/thrust/system/cuda/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cuda/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/memory_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/memory_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/par.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/par_to_seq.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par_to_seq.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/parallel_for.h" "$(@D)/cuda/include/thrust/system/cuda/detail/parallel_for.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/partition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/remove.h" "$(@D)/cuda/include/thrust/system/cuda/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/replace.h" "$(@D)/cuda/include/thrust/system/cuda/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cuda/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cuda/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/terminate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/terminate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/unique.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/util.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/error.h" "$(@D)/cuda/include/thrust/system/cuda/error.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/experimental/pinned_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/experimental/pinned_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/memory.h" "$(@D)/cuda/include/thrust/system/cuda/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/vector.h" "$(@D)/cuda/include/thrust/system/cuda/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/adl/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/adl/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/count.h" "$(@D)/cuda/include/thrust/system/detail/adl/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/equal.h" "$(@D)/cuda/include/thrust/system/detail/adl/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/extrema.h" "$(@D)/cuda/include/thrust/system/detail/adl/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/find.h" "$(@D)/cuda/include/thrust/system/detail/adl/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/for_each.h" "$(@D)/cuda/include/thrust/system/detail/adl/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/gather.h" "$(@D)/cuda/include/thrust/system/detail/adl/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/generate.h" "$(@D)/cuda/include/thrust/system/detail/adl/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/get_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/adl/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/adl/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/logical.h" "$(@D)/cuda/include/thrust/system/detail/adl/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/adl/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/merge.h" "$(@D)/cuda/include/thrust/system/detail/adl/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/adl/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/partition.h" "$(@D)/cuda/include/thrust/system/detail/adl/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/remove.h" "$(@D)/cuda/include/thrust/system/detail/adl/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/replace.h" "$(@D)/cuda/include/thrust/system/detail/adl/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reverse.h" "$(@D)/cuda/include/thrust/system/detail/adl/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scatter.h" "$(@D)/cuda/include/thrust/system/detail/adl/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/sequence.h" "$(@D)/cuda/include/thrust/system/detail/adl/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/adl/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/sort.h" "$(@D)/cuda/include/thrust/system/detail/adl/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/adl/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/adl/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/adl/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/unique.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/bad_alloc.h" "$(@D)/cuda/include/thrust/system/detail/bad_alloc.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/errno.h" "$(@D)/cuda/include/thrust/system/detail/errno.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_category.inl" "$(@D)/cuda/include/thrust/system/detail/error_category.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_code.inl" "$(@D)/cuda/include/thrust/system/detail/error_code.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_condition.inl" "$(@D)/cuda/include/thrust/system/detail/error_condition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/advance.h" "$(@D)/cuda/include/thrust/system/detail/generic/advance.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/advance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/advance.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy_if.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/count.h" "$(@D)/cuda/include/thrust/system/detail/generic/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/count.inl" "$(@D)/cuda/include/thrust/system/detail/generic/count.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/distance.h" "$(@D)/cuda/include/thrust/system/detail/generic/distance.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/distance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/distance.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/equal.h" "$(@D)/cuda/include/thrust/system/detail/generic/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/equal.inl" "$(@D)/cuda/include/thrust/system/detail/generic/equal.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/extrema.h" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/extrema.inl" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/find.h" "$(@D)/cuda/include/thrust/system/detail/generic/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/find.inl" "$(@D)/cuda/include/thrust/system/detail/generic/find.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/for_each.h" "$(@D)/cuda/include/thrust/system/detail/generic/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/gather.h" "$(@D)/cuda/include/thrust/system/detail/generic/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/gather.inl" "$(@D)/cuda/include/thrust/system/detail/generic/gather.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/generate.h" "$(@D)/cuda/include/thrust/system/detail/generic/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/generate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/generate.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/inner_product.inl" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/logical.h" "$(@D)/cuda/include/thrust/system/detail/generic/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/memory.h" "$(@D)/cuda/include/thrust/system/detail/generic/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/memory.inl" "$(@D)/cuda/include/thrust/system/detail/generic/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/merge.h" "$(@D)/cuda/include/thrust/system/detail/generic/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/merge.inl" "$(@D)/cuda/include/thrust/system/detail/generic/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/mismatch.inl" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/partition.h" "$(@D)/cuda/include/thrust/system/detail/generic/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/partition.inl" "$(@D)/cuda/include/thrust/system/detail/generic/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/remove.h" "$(@D)/cuda/include/thrust/system/detail/generic/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/remove.inl" "$(@D)/cuda/include/thrust/system/detail/generic/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/replace.h" "$(@D)/cuda/include/thrust/system/detail/generic/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/replace.inl" "$(@D)/cuda/include/thrust/system/detail/generic/replace.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reverse.h" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reverse.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scalar/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scalar/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scatter.h" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scatter.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/select_system.h" "$(@D)/cuda/include/thrust/system/detail/generic/select_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sequence.h" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sequence.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/set_operations.inl" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sort.h" "$(@D)/cuda/include/thrust/system/detail/generic/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sort.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/swap_ranges.inl" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tabulate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tag.h" "$(@D)/cuda/include/thrust/system/detail/generic/tag.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/temporary_buffer.inl" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/type_traits.h" "$(@D)/cuda/include/thrust/system/detail/generic/type_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/internal/decompose.h" "$(@D)/cuda/include/thrust/system/detail/internal/decompose.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/sequential/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/sequential/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy_backward.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_backward.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/count.h" "$(@D)/cuda/include/thrust/system/detail/sequential/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/equal.h" "$(@D)/cuda/include/thrust/system/detail/sequential/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/execution_policy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/extrema.h" "$(@D)/cuda/include/thrust/system/detail/sequential/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/find.h" "$(@D)/cuda/include/thrust/system/detail/sequential/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/for_each.h" "$(@D)/cuda/include/thrust/system/detail/sequential/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/gather.h" "$(@D)/cuda/include/thrust/system/detail/sequential/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/general_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/general_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/generate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/get_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/sequential/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/insertion_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/insertion_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/sequential/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/logical.h" "$(@D)/cuda/include/thrust/system/detail/sequential/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/sequential/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/merge.h" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/merge.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/sequential/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/partition.h" "$(@D)/cuda/include/thrust/system/detail/sequential/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/remove.h" "$(@D)/cuda/include/thrust/system/detail/sequential/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/replace.h" "$(@D)/cuda/include/thrust/system/detail/sequential/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reverse.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scatter.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sequence.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/sequential/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/sequential/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/sequential/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/trivial_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/trivial_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/unique.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/system_error.inl" "$(@D)/cuda/include/thrust/system/detail/system_error.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/error_code.h" "$(@D)/cuda/include/thrust/system/error_code.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/omp/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/omp/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/count.h" "$(@D)/cuda/include/thrust/system/omp/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/equal.h" "$(@D)/cuda/include/thrust/system/omp/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/omp/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/find.h" "$(@D)/cuda/include/thrust/system/omp/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/gather.h" "$(@D)/cuda/include/thrust/system/omp/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/generate.h" "$(@D)/cuda/include/thrust/system/omp/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/omp/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/omp/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/logical.h" "$(@D)/cuda/include/thrust/system/omp/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/omp/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/omp/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/merge.h" "$(@D)/cuda/include/thrust/system/omp/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/omp/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/par.h" "$(@D)/cuda/include/thrust/system/omp/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/partition.h" "$(@D)/cuda/include/thrust/system/omp/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/partition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/remove.h" "$(@D)/cuda/include/thrust/system/omp/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/remove.inl" "$(@D)/cuda/include/thrust/system/omp/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/replace.h" "$(@D)/cuda/include/thrust/system/omp/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/omp/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/omp/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/omp/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/omp/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sort.h" "$(@D)/cuda/include/thrust/system/omp/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sort.inl" "$(@D)/cuda/include/thrust/system/omp/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/omp/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/omp/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/omp/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/omp/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/memory.h" "$(@D)/cuda/include/thrust/system/omp/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/vector.h" "$(@D)/cuda/include/thrust/system/omp/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/system_error.h" "$(@D)/cuda/include/thrust/system/system_error.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/tbb/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/tbb/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/count.h" "$(@D)/cuda/include/thrust/system/tbb/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/equal.h" "$(@D)/cuda/include/thrust/system/tbb/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/extrema.h" "$(@D)/cuda/include/thrust/system/tbb/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/find.h" "$(@D)/cuda/include/thrust/system/tbb/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/for_each.h" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/gather.h" "$(@D)/cuda/include/thrust/system/tbb/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/generate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/get_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/tbb/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/tbb/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/logical.h" "$(@D)/cuda/include/thrust/system/tbb/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/tbb/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/memory.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/merge.h" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/merge.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/tbb/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/par.h" "$(@D)/cuda/include/thrust/system/tbb/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/partition.h" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/partition.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_intervals.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/remove.h" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/remove.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/replace.h" "$(@D)/cuda/include/thrust/system/tbb/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reverse.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scatter.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sequence.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/tbb/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sort.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sort.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/tbb/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/tbb/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/vector.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/memory.h" "$(@D)/cuda/include/thrust/system/tbb/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/vector.h" "$(@D)/cuda/include/thrust/system/tbb/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system_error.h" "$(@D)/cuda/include/thrust/system_error.h" && cp "/usr/local/cuda-9.0/include/thrust/tabulate.h" "$(@D)/cuda/include/thrust/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/transform.h" "$(@D)/cuda/include/thrust/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/transform_reduce.h" "$(@D)/cuda/include/thrust/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/transform_scan.h" "$(@D)/cuda/include/thrust/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/tuple.h" "$(@D)/cuda/include/thrust/tuple.h" && cp "/usr/local/cuda-9.0/include/thrust/uninitialized_copy.h" "$(@D)/cuda/include/thrust/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/uninitialized_fill.h" "$(@D)/cuda/include/thrust/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/unique.h" "$(@D)/cuda/include/thrust/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/version.h" "$(@D)/cuda/include/thrust/version.h" && cp "/usr/local/cuda-9.0/include/vector_functions.h" "$(@D)/cuda/include/vector_functions.h" && cp "/usr/local/cuda-9.0/include/vector_functions.hpp" "$(@D)/cuda/include/vector_functions.hpp" && cp "/usr/local/cuda-9.0/include/vector_types.h" "$(@D)/cuda/include/vector_types.h" """, ) @@ -1264,72 +1192,69 @@ genrule( name = "cuda-nvvm", outs = [ "cuda/nvvm/bin/cicc", - "cuda/nvvm/libdevice/libdevice.compute_50.10.bc", - "cuda/nvvm/libdevice/libdevice.compute_30.10.bc", - "cuda/nvvm/libdevice/libdevice.compute_20.10.bc", - "cuda/nvvm/libdevice/libdevice.compute_35.10.bc", - "cuda/nvvm/lib64/libnvvm.so.3", - "cuda/nvvm/lib64/libnvvm.so", - "cuda/nvvm/lib64/libnvvm.so.3.1.0", "cuda/nvvm/include/nvvm.h", - "cuda/nvvm/libnvvm-samples/ptxgen/README.txt", - "cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c", - "cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt", + "cuda/nvvm/lib64/libnvvm.so", + "cuda/nvvm/lib64/libnvvm.so.3", + "cuda/nvvm/lib64/libnvvm.so.3.2.0", + "cuda/nvvm/libdevice/libdevice.10.bc", + "cuda/nvvm/libnvvm-samples/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/README.txt", "cuda/nvvm/libnvvm-samples/build.bat", - "cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt", - "cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu", + "cuda/nvvm/libnvvm-samples/build.sh", + "cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h", + "cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h", "cuda/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt", "cuda/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp", - "cuda/nvvm/libnvvm-samples/README.txt", - "cuda/nvvm/libnvvm-samples/simple/simple.c", - "cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll", + "cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu", + "cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/ptxgen/README.txt", + "cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c", + "cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt", "cuda/nvvm/libnvvm-samples/simple/README.txt", + "cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll", "cuda/nvvm/libnvvm-samples/simple/simple-gpu64.ll", - "cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt", - "cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h", - "cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h", - "cuda/nvvm/libnvvm-samples/build.sh", - "cuda/nvvm/libnvvm-samples/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/simple/simple.c", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/nvvm/bin/cicc" "$(@D)/cuda/nvvm/bin/cicc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_50.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_50.10.bc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_30.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_30.10.bc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_20.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_20.10.bc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_35.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_35.10.bc" && cp "/usr/local/cuda-8.0/nvvm/lib64/libnvvm.so.3" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3" && cp "/usr/local/cuda-8.0/nvvm/lib64/libnvvm.so" "$(@D)/cuda/nvvm/lib64/libnvvm.so" && cp "/usr/local/cuda-8.0/nvvm/lib64/libnvvm.so.3.1.0" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3.1.0" && cp "/usr/local/cuda-8.0/nvvm/include/nvvm.h" "$(@D)/cuda/nvvm/include/nvvm.h" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/ptxgen/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/ptxgen/ptxgen.c" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/build.bat" "$(@D)/cuda/nvvm/libnvvm-samples/build.bat" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/simple.c" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple.c" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/simple-gpu.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/simple-gpu64.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu64.ll" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/common/include/DDSWriter.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/build.sh" "$(@D)/cuda/nvvm/libnvvm-samples/build.sh" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/CMakeLists.txt" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/nvvm/bin/cicc" "$(@D)/cuda/nvvm/bin/cicc" && cp "/usr/local/cuda-9.0/nvvm/include/nvvm.h" "$(@D)/cuda/nvvm/include/nvvm.h" && cp "/usr/local/cuda-9.0/nvvm/lib64/libnvvm.so" "$(@D)/cuda/nvvm/lib64/libnvvm.so" && cp "/usr/local/cuda-9.0/nvvm/lib64/libnvvm.so.3" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3" && cp "/usr/local/cuda-9.0/nvvm/lib64/libnvvm.so.3.2.0" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3.2.0" && cp "/usr/local/cuda-9.0/nvvm/libdevice/libdevice.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.10.bc" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/build.bat" "$(@D)/cuda/nvvm/libnvvm-samples/build.bat" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/build.sh" "$(@D)/cuda/nvvm/libnvvm-samples/build.sh" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/common/include/DDSWriter.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/ptxgen/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/ptxgen/ptxgen.c" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/simple-gpu.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/simple-gpu64.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu64.ll" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/simple.c" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple.c" """, ) genrule( name = "cuda-extras", outs = [ - "cuda/extras/CUPTI/include/cupti_result.h", + "cuda/extras/CUPTI/include/GL/gl.h", + "cuda/extras/CUPTI/include/GL/glew.h", + "cuda/extras/CUPTI/include/GL/glext.h", + "cuda/extras/CUPTI/include/GL/glu.h", + "cuda/extras/CUPTI/include/GL/glut.h", + "cuda/extras/CUPTI/include/GL/glx.h", + "cuda/extras/CUPTI/include/GL/glxext.h", + "cuda/extras/CUPTI/include/GL/wglew.h", + "cuda/extras/CUPTI/include/GL/wglext.h", + "cuda/extras/CUPTI/include/cuda_stdint.h", + "cuda/extras/CUPTI/include/cupti.h", + "cuda/extras/CUPTI/include/cupti_activity.h", + "cuda/extras/CUPTI/include/cupti_callbacks.h", + "cuda/extras/CUPTI/include/cupti_driver_cbid.h", "cuda/extras/CUPTI/include/cupti_events.h", - "cuda/extras/CUPTI/include/openacc/cupti_openacc.h", + "cuda/extras/CUPTI/include/cupti_metrics.h", + "cuda/extras/CUPTI/include/cupti_nvtx_cbid.h", + "cuda/extras/CUPTI/include/cupti_result.h", + "cuda/extras/CUPTI/include/cupti_runtime_cbid.h", "cuda/extras/CUPTI/include/cupti_version.h", - "cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h", + "cuda/extras/CUPTI/include/generated_cudaGL_meta.h", "cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h", - "cuda/extras/CUPTI/include/cupti_activity.h", - "cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h", + "cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h", "cuda/extras/CUPTI/include/generated_cuda_meta.h", - "cuda/extras/CUPTI/include/cupti_nvtx_cbid.h", - "cuda/extras/CUPTI/include/cuda_stdint.h", - "cuda/extras/CUPTI/include/generated_cudaGL_meta.h", + "cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h", "cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h", - "cuda/extras/CUPTI/include/cupti_metrics.h", - "cuda/extras/CUPTI/include/cupti_callbacks.h", - "cuda/extras/CUPTI/include/cupti_runtime_cbid.h", - "cuda/extras/CUPTI/include/cupti.h", - "cuda/extras/CUPTI/include/GL/glut.h", - "cuda/extras/CUPTI/include/GL/glu.h", - "cuda/extras/CUPTI/include/GL/glxext.h", - "cuda/extras/CUPTI/include/GL/wglext.h", - "cuda/extras/CUPTI/include/GL/glx.h", - "cuda/extras/CUPTI/include/GL/glext.h", - "cuda/extras/CUPTI/include/GL/wglew.h", - "cuda/extras/CUPTI/include/GL/gl.h", - "cuda/extras/CUPTI/include/GL/glew.h", - "cuda/extras/CUPTI/include/cupti_driver_cbid.h", "cuda/extras/CUPTI/include/generated_nvtx_meta.h", + "cuda/extras/CUPTI/include/openacc/cupti_openacc.h", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_result.h" "$(@D)/cuda/extras/CUPTI/include/cupti_result.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_events.h" "$(@D)/cuda/extras/CUPTI/include/cupti_events.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/openacc/cupti_openacc.h" "$(@D)/cuda/extras/CUPTI/include/openacc/cupti_openacc.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_version.h" "$(@D)/cuda/extras/CUPTI/include/cupti_version.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cudaVDPAU_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_activity.h" "$(@D)/cuda/extras/CUPTI/include/cupti_activity.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_nvtx_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_nvtx_cbid.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cuda_stdint.h" "$(@D)/cuda/extras/CUPTI/include/cuda_stdint.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cudaGL_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaGL_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_metrics.h" "$(@D)/cuda/extras/CUPTI/include/cupti_metrics.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_callbacks.h" "$(@D)/cuda/extras/CUPTI/include/cupti_callbacks.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_runtime_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_runtime_cbid.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti.h" "$(@D)/cuda/extras/CUPTI/include/cupti.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glut.h" "$(@D)/cuda/extras/CUPTI/include/GL/glut.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glu.h" "$(@D)/cuda/extras/CUPTI/include/GL/glu.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glxext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glxext.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/wglext.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglext.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glx.h" "$(@D)/cuda/extras/CUPTI/include/GL/glx.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glext.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/wglew.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglew.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/gl.h" "$(@D)/cuda/extras/CUPTI/include/GL/gl.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glew.h" "$(@D)/cuda/extras/CUPTI/include/GL/glew.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_driver_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_driver_cbid.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_nvtx_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_nvtx_meta.h" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/gl.h" "$(@D)/cuda/extras/CUPTI/include/GL/gl.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glew.h" "$(@D)/cuda/extras/CUPTI/include/GL/glew.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glu.h" "$(@D)/cuda/extras/CUPTI/include/GL/glu.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glut.h" "$(@D)/cuda/extras/CUPTI/include/GL/glut.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glx.h" "$(@D)/cuda/extras/CUPTI/include/GL/glx.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glxext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glxext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/wglew.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglew.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/wglext.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cuda_stdint.h" "$(@D)/cuda/extras/CUPTI/include/cuda_stdint.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti.h" "$(@D)/cuda/extras/CUPTI/include/cupti.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_activity.h" "$(@D)/cuda/extras/CUPTI/include/cupti_activity.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_callbacks.h" "$(@D)/cuda/extras/CUPTI/include/cupti_callbacks.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_driver_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_driver_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_events.h" "$(@D)/cuda/extras/CUPTI/include/cupti_events.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_metrics.h" "$(@D)/cuda/extras/CUPTI/include/cupti_metrics.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_nvtx_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_nvtx_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_result.h" "$(@D)/cuda/extras/CUPTI/include/cupti_result.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_runtime_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_runtime_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_version.h" "$(@D)/cuda/extras/CUPTI/include/cupti_version.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cudaGL_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaGL_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cudaVDPAU_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_nvtx_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_nvtx_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/openacc/cupti_openacc.h" "$(@D)/cuda/extras/CUPTI/include/openacc/cupti_openacc.h" """, ) @@ -1337,26 +1262,21 @@ genrule( name = "cuda-lib", outs = [ "cuda/lib/libcuda.so", - "cuda/lib/libcudart.so.8.0", + "cuda/lib/libcudart.so.9.0", "cuda/lib/libcudart_static.a", - "cuda/lib/libcublas.so.8.0", - "cuda/lib/libcusolver.so.8.0", - "cuda/lib/libcurand.so.8.0", - "cuda/lib/libcufft.so.8.0", - "cuda/lib/libcudnn.so.6", - "cuda/lib/libcupti.so.8.0", + "cuda/lib/libcublas.so.9.0", + "cuda/lib/libcusolver.so.9.0", + "cuda/lib/libcurand.so.9.0", + "cuda/lib/libcufft.so.9.0", + "cuda/lib/libcudnn.so.7", + "cuda/lib/libcupti.so.9.0", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcudart.so.8.0.61" "$(@D)/cuda/lib/libcudart.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcublas.so.8.0.88" "$(@D)/cuda/lib/libcublas.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcusolver.so.8.0.61" "$(@D)/cuda/lib/libcusolver.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcurand.so.8.0.61" "$(@D)/cuda/lib/libcurand.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcufft.so.8.0.61" "$(@D)/cuda/lib/libcufft.so.8.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.6.0.21" "$(@D)/cuda/lib/libcudnn.so.6" && cp "/usr/local/cuda-8.0/extras/CUPTI/lib64/libcupti.so.8.0.61" "$(@D)/cuda/lib/libcupti.so.8.0" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.282" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.0.5" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0" """, ) -genrule( +filegroup( name = "cudnn-include", - outs = [ - "cuda/include/cudnn.h", - ], - cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/include/cudnn.h" "$(@D)/cudnn.h" - """, + srcs = [], ) diff --git a/third_party/toolchains/gpus/py/BUILD b/third_party/toolchains/gpus/py/BUILD new file mode 100644 index 0000000000..2d5ace93ff --- /dev/null +++ b/third_party/toolchains/gpus/py/BUILD @@ -0,0 +1,171 @@ +# A build file to configure python remote repository used with Bazel remote +# execution service +# DO NOT EDIT: automatically generated BUILD file + +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "python_headers", + hdrs = [":python_include"], + data = select({ + ":windows": [":python_import_lib"], + "//conditions:default": [], + }), + includes = ["python_include"], + linkopts = select({ + # TODO(pcloudy): Ideally, this should just go into deps after resolving + # https://github.com/bazelbuild/bazel/issues/3237, + ":windows": ["$(locations :python_import_lib)"], + "//conditions:default": [], + }), +) + +cc_library( + name = "numpy_headers", + hdrs = [":numpy_include"], + includes = ["numpy_include"], +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +genrule( + name = "python_include", + outs = [ + "python_include/Python-ast.h", + "python_include/Python.h", + "python_include/abstract.h", + "python_include/asdl.h", + "python_include/ast.h", + "python_include/bitset.h", + "python_include/boolobject.h", + "python_include/bufferobject.h", + "python_include/bytearrayobject.h", + "python_include/bytes_methods.h", + "python_include/bytesobject.h", + "python_include/cStringIO.h", + "python_include/cellobject.h", + "python_include/ceval.h", + "python_include/classobject.h", + "python_include/cobject.h", + "python_include/code.h", + "python_include/codecs.h", + "python_include/compile.h", + "python_include/complexobject.h", + "python_include/datetime.h", + "python_include/descrobject.h", + "python_include/dictobject.h", + "python_include/dtoa.h", + "python_include/enumobject.h", + "python_include/errcode.h", + "python_include/eval.h", + "python_include/fileobject.h", + "python_include/floatobject.h", + "python_include/frameobject.h", + "python_include/funcobject.h", + "python_include/genobject.h", + "python_include/graminit.h", + "python_include/grammar.h", + "python_include/import.h", + "python_include/intobject.h", + "python_include/intrcheck.h", + "python_include/iterobject.h", + "python_include/listobject.h", + "python_include/longintrepr.h", + "python_include/longobject.h", + "python_include/marshal.h", + "python_include/memoryobject.h", + "python_include/metagrammar.h", + "python_include/methodobject.h", + "python_include/modsupport.h", + "python_include/moduleobject.h", + "python_include/node.h", + "python_include/object.h", + "python_include/objimpl.h", + "python_include/opcode.h", + "python_include/osdefs.h", + "python_include/parsetok.h", + "python_include/patchlevel.h", + "python_include/pgen.h", + "python_include/pgenheaders.h", + "python_include/py_curses.h", + "python_include/pyarena.h", + "python_include/pycapsule.h", + "python_include/pyconfig.h", + "python_include/pyctype.h", + "python_include/pydebug.h", + "python_include/pyerrors.h", + "python_include/pyexpat.h", + "python_include/pyfpe.h", + "python_include/pygetopt.h", + "python_include/pymacconfig.h", + "python_include/pymactoolbox.h", + "python_include/pymath.h", + "python_include/pymem.h", + "python_include/pyport.h", + "python_include/pystate.h", + "python_include/pystrcmp.h", + "python_include/pystrtod.h", + "python_include/pythonrun.h", + "python_include/pythread.h", + "python_include/rangeobject.h", + "python_include/setobject.h", + "python_include/sliceobject.h", + "python_include/stringobject.h", + "python_include/structmember.h", + "python_include/structseq.h", + "python_include/symtable.h", + "python_include/sysmodule.h", + "python_include/timefuncs.h", + "python_include/token.h", + "python_include/traceback.h", + "python_include/tupleobject.h", + "python_include/ucnhash.h", + "python_include/unicodeobject.h", + "python_include/warnings.h", + "python_include/weakrefobject.h", + ], + cmd = """ +cp "/usr/include/python2.7/Python-ast.h" "$(@D)/python_include/Python-ast.h" && cp "/usr/include/python2.7/Python.h" "$(@D)/python_include/Python.h" && cp "/usr/include/python2.7/abstract.h" "$(@D)/python_include/abstract.h" && cp "/usr/include/python2.7/asdl.h" "$(@D)/python_include/asdl.h" && cp "/usr/include/python2.7/ast.h" "$(@D)/python_include/ast.h" && cp "/usr/include/python2.7/bitset.h" "$(@D)/python_include/bitset.h" && cp "/usr/include/python2.7/boolobject.h" "$(@D)/python_include/boolobject.h" && cp "/usr/include/python2.7/bufferobject.h" "$(@D)/python_include/bufferobject.h" && cp "/usr/include/python2.7/bytearrayobject.h" "$(@D)/python_include/bytearrayobject.h" && cp "/usr/include/python2.7/bytes_methods.h" "$(@D)/python_include/bytes_methods.h" && cp "/usr/include/python2.7/bytesobject.h" "$(@D)/python_include/bytesobject.h" && cp "/usr/include/python2.7/cStringIO.h" "$(@D)/python_include/cStringIO.h" && cp "/usr/include/python2.7/cellobject.h" "$(@D)/python_include/cellobject.h" && cp "/usr/include/python2.7/ceval.h" "$(@D)/python_include/ceval.h" && cp "/usr/include/python2.7/classobject.h" "$(@D)/python_include/classobject.h" && cp "/usr/include/python2.7/cobject.h" "$(@D)/python_include/cobject.h" && cp "/usr/include/python2.7/code.h" "$(@D)/python_include/code.h" && cp "/usr/include/python2.7/codecs.h" "$(@D)/python_include/codecs.h" && cp "/usr/include/python2.7/compile.h" "$(@D)/python_include/compile.h" && cp "/usr/include/python2.7/complexobject.h" "$(@D)/python_include/complexobject.h" && cp "/usr/include/python2.7/datetime.h" "$(@D)/python_include/datetime.h" && cp "/usr/include/python2.7/descrobject.h" "$(@D)/python_include/descrobject.h" && cp "/usr/include/python2.7/dictobject.h" "$(@D)/python_include/dictobject.h" && cp "/usr/include/python2.7/dtoa.h" "$(@D)/python_include/dtoa.h" && cp "/usr/include/python2.7/enumobject.h" "$(@D)/python_include/enumobject.h" && cp "/usr/include/python2.7/errcode.h" "$(@D)/python_include/errcode.h" && cp "/usr/include/python2.7/eval.h" "$(@D)/python_include/eval.h" && cp "/usr/include/python2.7/fileobject.h" "$(@D)/python_include/fileobject.h" && cp "/usr/include/python2.7/floatobject.h" "$(@D)/python_include/floatobject.h" && cp "/usr/include/python2.7/frameobject.h" "$(@D)/python_include/frameobject.h" && cp "/usr/include/python2.7/funcobject.h" "$(@D)/python_include/funcobject.h" && cp "/usr/include/python2.7/genobject.h" "$(@D)/python_include/genobject.h" && cp "/usr/include/python2.7/graminit.h" "$(@D)/python_include/graminit.h" && cp "/usr/include/python2.7/grammar.h" "$(@D)/python_include/grammar.h" && cp "/usr/include/python2.7/import.h" "$(@D)/python_include/import.h" && cp "/usr/include/python2.7/intobject.h" "$(@D)/python_include/intobject.h" && cp "/usr/include/python2.7/intrcheck.h" "$(@D)/python_include/intrcheck.h" && cp "/usr/include/python2.7/iterobject.h" "$(@D)/python_include/iterobject.h" && cp "/usr/include/python2.7/listobject.h" "$(@D)/python_include/listobject.h" && cp "/usr/include/python2.7/longintrepr.h" "$(@D)/python_include/longintrepr.h" && cp "/usr/include/python2.7/longobject.h" "$(@D)/python_include/longobject.h" && cp "/usr/include/python2.7/marshal.h" "$(@D)/python_include/marshal.h" && cp "/usr/include/python2.7/memoryobject.h" "$(@D)/python_include/memoryobject.h" && cp "/usr/include/python2.7/metagrammar.h" "$(@D)/python_include/metagrammar.h" && cp "/usr/include/python2.7/methodobject.h" "$(@D)/python_include/methodobject.h" && cp "/usr/include/python2.7/modsupport.h" "$(@D)/python_include/modsupport.h" && cp "/usr/include/python2.7/moduleobject.h" "$(@D)/python_include/moduleobject.h" && cp "/usr/include/python2.7/node.h" "$(@D)/python_include/node.h" && cp "/usr/include/python2.7/object.h" "$(@D)/python_include/object.h" && cp "/usr/include/python2.7/objimpl.h" "$(@D)/python_include/objimpl.h" && cp "/usr/include/python2.7/opcode.h" "$(@D)/python_include/opcode.h" && cp "/usr/include/python2.7/osdefs.h" "$(@D)/python_include/osdefs.h" && cp "/usr/include/python2.7/parsetok.h" "$(@D)/python_include/parsetok.h" && cp "/usr/include/python2.7/patchlevel.h" "$(@D)/python_include/patchlevel.h" && cp "/usr/include/python2.7/pgen.h" "$(@D)/python_include/pgen.h" && cp "/usr/include/python2.7/pgenheaders.h" "$(@D)/python_include/pgenheaders.h" && cp "/usr/include/python2.7/py_curses.h" "$(@D)/python_include/py_curses.h" && cp "/usr/include/python2.7/pyarena.h" "$(@D)/python_include/pyarena.h" && cp "/usr/include/python2.7/pycapsule.h" "$(@D)/python_include/pycapsule.h" && cp "/usr/include/python2.7/pyconfig.h" "$(@D)/python_include/pyconfig.h" && cp "/usr/include/python2.7/pyctype.h" "$(@D)/python_include/pyctype.h" && cp "/usr/include/python2.7/pydebug.h" "$(@D)/python_include/pydebug.h" && cp "/usr/include/python2.7/pyerrors.h" "$(@D)/python_include/pyerrors.h" && cp "/usr/include/python2.7/pyexpat.h" "$(@D)/python_include/pyexpat.h" && cp "/usr/include/python2.7/pyfpe.h" "$(@D)/python_include/pyfpe.h" && cp "/usr/include/python2.7/pygetopt.h" "$(@D)/python_include/pygetopt.h" && cp "/usr/include/python2.7/pymacconfig.h" "$(@D)/python_include/pymacconfig.h" && cp "/usr/include/python2.7/pymactoolbox.h" "$(@D)/python_include/pymactoolbox.h" && cp "/usr/include/python2.7/pymath.h" "$(@D)/python_include/pymath.h" && cp "/usr/include/python2.7/pymem.h" "$(@D)/python_include/pymem.h" && cp "/usr/include/python2.7/pyport.h" "$(@D)/python_include/pyport.h" && cp "/usr/include/python2.7/pystate.h" "$(@D)/python_include/pystate.h" && cp "/usr/include/python2.7/pystrcmp.h" "$(@D)/python_include/pystrcmp.h" && cp "/usr/include/python2.7/pystrtod.h" "$(@D)/python_include/pystrtod.h" && cp "/usr/include/python2.7/pythonrun.h" "$(@D)/python_include/pythonrun.h" && cp "/usr/include/python2.7/pythread.h" "$(@D)/python_include/pythread.h" && cp "/usr/include/python2.7/rangeobject.h" "$(@D)/python_include/rangeobject.h" && cp "/usr/include/python2.7/setobject.h" "$(@D)/python_include/setobject.h" && cp "/usr/include/python2.7/sliceobject.h" "$(@D)/python_include/sliceobject.h" && cp "/usr/include/python2.7/stringobject.h" "$(@D)/python_include/stringobject.h" && cp "/usr/include/python2.7/structmember.h" "$(@D)/python_include/structmember.h" && cp "/usr/include/python2.7/structseq.h" "$(@D)/python_include/structseq.h" && cp "/usr/include/python2.7/symtable.h" "$(@D)/python_include/symtable.h" && cp "/usr/include/python2.7/sysmodule.h" "$(@D)/python_include/sysmodule.h" && cp "/usr/include/python2.7/timefuncs.h" "$(@D)/python_include/timefuncs.h" && cp "/usr/include/python2.7/token.h" "$(@D)/python_include/token.h" && cp "/usr/include/python2.7/traceback.h" "$(@D)/python_include/traceback.h" && cp "/usr/include/python2.7/tupleobject.h" "$(@D)/python_include/tupleobject.h" && cp "/usr/include/python2.7/ucnhash.h" "$(@D)/python_include/ucnhash.h" && cp "/usr/include/python2.7/unicodeobject.h" "$(@D)/python_include/unicodeobject.h" && cp "/usr/include/python2.7/warnings.h" "$(@D)/python_include/warnings.h" && cp "/usr/include/python2.7/weakrefobject.h" "$(@D)/python_include/weakrefobject.h" + """, +) + +genrule( + name = "numpy_include", + outs = [ + "numpy_include/numpy/__multiarray_api.h", + "numpy_include/numpy/__ufunc_api.h", + "numpy_include/numpy/_neighborhood_iterator_imp.h", + "numpy_include/numpy/_numpyconfig.h", + "numpy_include/numpy/arrayobject.h", + "numpy_include/numpy/arrayscalars.h", + "numpy_include/numpy/halffloat.h", + "numpy_include/numpy/multiarray_api.txt", + "numpy_include/numpy/ndarrayobject.h", + "numpy_include/numpy/ndarraytypes.h", + "numpy_include/numpy/noprefix.h", + "numpy_include/numpy/npy_1_7_deprecated_api.h", + "numpy_include/numpy/npy_3kcompat.h", + "numpy_include/numpy/npy_common.h", + "numpy_include/numpy/npy_cpu.h", + "numpy_include/numpy/npy_endian.h", + "numpy_include/numpy/npy_interrupt.h", + "numpy_include/numpy/npy_math.h", + "numpy_include/numpy/npy_no_deprecated_api.h", + "numpy_include/numpy/npy_os.h", + "numpy_include/numpy/numpyconfig.h", + "numpy_include/numpy/old_defines.h", + "numpy_include/numpy/oldnumeric.h", + "numpy_include/numpy/ufunc_api.txt", + "numpy_include/numpy/ufuncobject.h", + "numpy_include/numpy/utils.h", + ], + cmd = """ +cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/__multiarray_api.h" "$(@D)/numpy_include/numpy/__multiarray_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/__ufunc_api.h" "$(@D)/numpy_include/numpy/__ufunc_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h" "$(@D)/numpy_include/numpy/_neighborhood_iterator_imp.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/_numpyconfig.h" "$(@D)/numpy_include/numpy/_numpyconfig.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/arrayobject.h" "$(@D)/numpy_include/numpy/arrayobject.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/arrayscalars.h" "$(@D)/numpy_include/numpy/arrayscalars.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/halffloat.h" "$(@D)/numpy_include/numpy/halffloat.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/multiarray_api.txt" "$(@D)/numpy_include/numpy/multiarray_api.txt" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ndarrayobject.h" "$(@D)/numpy_include/numpy/ndarrayobject.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ndarraytypes.h" "$(@D)/numpy_include/numpy/ndarraytypes.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/noprefix.h" "$(@D)/numpy_include/numpy/noprefix.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h" "$(@D)/numpy_include/numpy/npy_1_7_deprecated_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_3kcompat.h" "$(@D)/numpy_include/numpy/npy_3kcompat.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_common.h" "$(@D)/numpy_include/numpy/npy_common.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_cpu.h" "$(@D)/numpy_include/numpy/npy_cpu.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_endian.h" "$(@D)/numpy_include/numpy/npy_endian.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_interrupt.h" "$(@D)/numpy_include/numpy/npy_interrupt.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_math.h" "$(@D)/numpy_include/numpy/npy_math.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_no_deprecated_api.h" "$(@D)/numpy_include/numpy/npy_no_deprecated_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_os.h" "$(@D)/numpy_include/numpy/npy_os.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/numpyconfig.h" "$(@D)/numpy_include/numpy/numpyconfig.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/old_defines.h" "$(@D)/numpy_include/numpy/old_defines.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/oldnumeric.h" "$(@D)/numpy_include/numpy/oldnumeric.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ufunc_api.txt" "$(@D)/numpy_include/numpy/ufunc_api.txt" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ufuncobject.h" "$(@D)/numpy_include/numpy/ufuncobject.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/utils.h" "$(@D)/numpy_include/numpy/utils.h" + """, +) -- GitLab From cf11a4cb47cb550cc6a1de5e5eb4394a9d949e09 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 2 Mar 2018 11:15:14 -0800 Subject: [PATCH 263/311] [XLA] Support while loops and constant in HLO BF16 propagation. PiperOrigin-RevId: 187644155 --- tensorflow/compiler/xla/literal_util.cc | 18 + tensorflow/compiler/xla/literal_util.h | 5 + tensorflow/compiler/xla/service/BUILD | 2 + .../xla/service/bfloat16_propagation.cc | 390 ++++++++++++++---- .../xla/service/bfloat16_propagation.h | 41 +- .../xla/service/bfloat16_propagation_test.cc | 227 ++++++++++ 6 files changed, 598 insertions(+), 85 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index a345e95a8b..1d1418fc2f 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -1434,6 +1434,24 @@ StatusOr> Literal::Convert( } } +StatusOr> Literal::ConvertToShape( + const Shape& dest_shape) const { + if (!ShapeUtil::IsTuple(dest_shape)) { + return Convert(dest_shape.element_type()); + } + std::vector elements; + for (int i = 0; i < ShapeUtil::TupleElementCount(shape()); ++i) { + auto element = LiteralView::Create(*this, {i}); + TF_ASSIGN_OR_RETURN( + auto new_element, + element.ConvertToShape(ShapeUtil::GetSubshape(dest_shape, {i}))); + elements.push_back(std::move(*new_element)); + } + auto converted = MakeUnique(); + *converted = Literal::MoveIntoTuple(&elements); + return std::move(converted); +} + template bool Literal::Piece::EqualElementsInternal( const Literal::Piece& other, std::vector* multi_index) const { diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 1d58f0cbc7..cdc5d807e0 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -333,6 +333,11 @@ class Literal { StatusOr> Convert( PrimitiveType primitive_dest_type) const; + // Converts this literal to the given shape. Returns an error is the + // conversion is not possible. + StatusOr> ConvertToShape( + const Shape& dest_shape) const; + // Creates a scalar literal value zero of the given primitive type. static Literal Zero(PrimitiveType primitive_type); diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index e4ae812532..d71790fb2d 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -129,6 +129,7 @@ cc_library( ":hlo_dce", ":hlo_pass", ":tuple_simplifier", + "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_tree", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", @@ -148,6 +149,7 @@ tf_cc_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep ], ) diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index 6145c690b9..7708504dc9 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/bfloat16_propagation.h" +#include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_dce.h" @@ -68,33 +69,53 @@ void BFloat16Propagation::DetermineAndMutateFusionComputationPrecision( for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); } + computations_visited_in_mutation_pass_.insert( + fusion->fused_instructions_computation()); } -void BFloat16Propagation::AdjustFusionParameters(HloInstruction* fusion) { - CHECK_EQ(fusion->fused_parameters().size(), fusion->operand_count()); - for (int64 i = 0; i < fusion->operand_count(); ++i) { - auto parameter = fusion->fused_parameter(i); - ShapeUtil::ForEachMutableSubshape( - parameter->mutable_shape(), - [&](Shape* subshape, const ShapeIndex& index) { - if (!ShapeUtil::IsLeafIndex(parameter->shape(), index)) { - return; - } - PrimitiveType operand_type = - ShapeUtil::GetSubshape(fusion->operand(i)->shape(), index) - .element_type(); - if (subshape->element_type() == operand_type) { - return; - } - CHECK(operand_type == F32 || operand_type == BF16); - subshape->set_element_type(operand_type); +void BFloat16Propagation::DetermineAndMutateWhileComputationsPrecision( + HloInstruction* while_hlo) { + CHECK_EQ(while_hlo->opcode(), HloOpcode::kWhile); + + // We are depending on the while node itself having already been analyzed for + // whether it can output BF16 and this has been adjusted in the output shape, + // and now we're looking to update the body and condition computations to + // match the new output shape, as well as recursively process the whole while + // node even if the output shape was not modified. + HloComputation* body = while_hlo->while_body(); + auto body_root = body->root_instruction(); + HloComputation* condition = while_hlo->while_condition(); + + ShapeUtil::ForEachMutableSubshape( + body_root->mutable_shape(), + [this, while_hlo, body_root](Shape* subshape, const ShapeIndex& index) { + if (subshape->element_type() != F32) { + return; + } + if (ShapeUtil::GetSubshape(while_hlo->shape(), index).element_type() == + BF16) { + subshape->set_element_type(BF16); changed_ = true; - VLOG(2) << "Fused parameter " << parameter->ToString() + VLOG(2) << "While body root " << body_root->ToString() << " at shape index " << index - << " adjusted to match operand in fusion " - << fusion->ToString(); - }); + << " changed to BF16 precision for while " + << while_hlo->ToString(); + } + }); + + auto body_insts = body->MakeInstructionPostOrder(); + for (auto inst_it = body_insts.rbegin(); inst_it != body_insts.rend(); + ++inst_it) { + DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); } + computations_visited_in_mutation_pass_.insert(body); + + auto condition_insts = condition->MakeInstructionPostOrder(); + for (auto inst_it = condition_insts.rbegin(); + inst_it != condition_insts.rend(); ++inst_it) { + DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); + } + computations_visited_in_mutation_pass_.insert(condition); } bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, @@ -108,14 +129,45 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, continue; } for (const HloUse& use : value->uses()) { + if (!ContainsKey(instructions_visited_in_mutation_pass_, + use.instruction)) { + // We don't know yet whether use.instruction will consume BF16 since it + // hasn't been visited. Although we visit instructions in reverse + // topological order, this is still possible because there may be + // unvisited instruction that alias the same buffer. In this case, we + // aggressively skip this use, and if this causes inconsistency (e.g., + // one use is in BF16 but another use is in F32), it will be resolved at + // the end of the BFloat16Propagation pass. + continue; + } + // Any visited user that can accept BF16 has already been updated if + // necessary, e.g., the output has been changed to BF16 if it propagates + // precision, or a called computation's parameters have been changed to + // BF16 for fusions or whiles. if (use.instruction->opcode() == HloOpcode::kFusion) { - auto fused_parameter = + const auto* fused_parameter = use.instruction->fused_parameter(use.operand_number); if (ShapeUtil::GetSubshape(fused_parameter->shape(), use.operand_index) .element_type() != BF16) { return false; } continue; + } else if (use.instruction->opcode() == HloOpcode::kWhile) { + const auto* cond_parameter = + use.instruction->while_condition()->parameter_instruction( + use.operand_number); + if (ShapeUtil::GetSubshape(cond_parameter->shape(), use.operand_index) + .element_type() != BF16) { + return false; + } + const auto* body_parameter = + use.instruction->while_body()->parameter_instruction( + use.operand_number); + if (ShapeUtil::GetSubshape(body_parameter->shape(), use.operand_index) + .element_type() != BF16) { + return false; + } + continue; } if (bfloat16_support_->EffectiveOperandPrecisionIsBF16( *use.instruction, use.operand_number)) { @@ -149,24 +201,36 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, void BFloat16Propagation::DetermineAndMutateInstructionPrecision( HloInstruction* hlo, bool skip_parameters) { - // We handle any fusion computation after the instruction is handled, because - // we need to know a fusion's output shape before propagating inside its fused - // computation. - auto cleaner = tensorflow::gtl::MakeCleanup([this, hlo] { - if (hlo->opcode() == HloOpcode::kFusion) { - DetermineAndMutateFusionComputationPrecision(hlo); - } - }); + // We handle any fusion computation or while body/condition after the + // instruction is handled, because we need to know the output shape of a + // fusion or while before propagating inside its computations. + bool postpone_processing_called_computations = false; + auto cleaner = tensorflow::gtl::MakeCleanup( + [this, hlo, &postpone_processing_called_computations] { + if (!postpone_processing_called_computations) { + if (hlo->opcode() == HloOpcode::kFusion) { + DetermineAndMutateFusionComputationPrecision(hlo); + } else if (hlo->opcode() == HloOpcode::kWhile) { + DetermineAndMutateWhileComputationsPrecision(hlo); + } + } + instructions_visited_in_mutation_pass_.insert(hlo); + }); + + if (hlo->opcode() == HloOpcode::kWhile && + (caller_counts_[hlo->while_condition()] > 1 || + caller_counts_[hlo->while_body()] > 1)) { + postpone_processing_called_computations = true; + return; + } // Do not change precision for instructions related to entry and exit of a // computation, and control flow, because this pass might break the interfaces // or assumptions for them. if (hlo->opcode() == HloOpcode::kInfeed || // hlo->opcode() == HloOpcode::kOutfeed || // - hlo->opcode() == HloOpcode::kConstant || // hlo->opcode() == HloOpcode::kCustomCall || // hlo->opcode() == HloOpcode::kCall || // - hlo->opcode() == HloOpcode::kWhile || // hlo->opcode() == HloOpcode::kConditional || // (hlo->opcode() == HloOpcode::kParameter && skip_parameters)) { return; @@ -231,60 +295,198 @@ bool BFloat16Propagation::InstructionIsCandidateForBF16Output( return true; } -Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( - HloModule* module) { - std::list computations_topological_order = - module->MakeComputationPostOrder(); - for (auto comp_it = computations_topological_order.rbegin(); - comp_it != computations_topological_order.rend(); ++comp_it) { - auto insts = (*comp_it)->MakeInstructionPostOrder(); - // Do the adjustment on each instruction in the computation in reverse - // topological order. - for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { - auto hlo = *inst_it; - auto adjust_buffer = [this, hlo](Shape* subshape, - const ShapeIndex& index) { - if (subshape->element_type() != F32 && - subshape->element_type() != BF16) { - return; +void BFloat16Propagation::AdjustCalledComputationParameters( + HloInstruction* hlo) { + auto adjust_computation = + [this, hlo](HloComputation* computation, + tensorflow::gtl::ArraySlice operands) { + // Adjust parameters. + CHECK_EQ(operands.size(), computation->num_parameters()); + for (int64 i = 0; i < operands.size(); ++i) { + auto parameter = computation->parameter_instruction(i); + ShapeUtil::ForEachMutableSubshape( + parameter->mutable_shape(), + [this, i, hlo, &operands, parameter](Shape* subshape, + const ShapeIndex& index) { + if (!ShapeUtil::IsLeafIndex(parameter->shape(), index)) { + return; + } + PrimitiveType operand_type = + ShapeUtil::GetSubshape(operands[i]->shape(), index) + .element_type(); + if (subshape->element_type() == operand_type) { + return; + } + CHECK(operand_type == F32 || operand_type == BF16); + subshape->set_element_type(operand_type); + changed_ = true; + VLOG(2) << "Called computation parameter " + << parameter->ToString() << " at shape index " << index + << " adjusted to match operand in HLO " + << hlo->ToString(); + }); } - PrimitiveType type = BF16; - for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { - if (value->shape().element_type() == BF16) { - continue; + }; + + switch (hlo->opcode()) { + case HloOpcode::kFusion: + adjust_computation(hlo->fused_instructions_computation(), + hlo->operands()); + break; + case HloOpcode::kWhile: + adjust_computation(hlo->while_condition(), hlo->operands()); + adjust_computation(hlo->while_body(), hlo->operands()); + break; + default: + break; + } +} + +void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) { + auto adjust_computation = [this, hlo](HloComputation* computation, + const Shape& output_shape) { + // Adjust root. + HloInstruction* root = computation->root_instruction(); + ShapeUtil::ForEachMutableSubshape( + root->mutable_shape(), [this, hlo, root, &output_shape]( + Shape* subshape, const ShapeIndex& index) { + if (!ShapeUtil::IsLeafIndex(hlo->shape(), index)) { + return; } - CHECK_EQ(value->shape().element_type(), F32); - type = F32; - break; - } - // It's possible that a user has been changed from BF16 to F32 - // during this final adjustment pass, so we need to check - // AllUsersConsumeBF16() again. - if (type == BF16 && !AllUsersConsumeBF16(*hlo, index)) { - type = F32; - } - if (type == F32) { - for (const auto* value : - dataflow_->GetValueSet(hlo, index).values()) { - // We rely on the fact that this adjustment works in reverse - // topological order. Adding the value to - // values_that_must_be_kept_as_f32_ will ensure the correctness - // of the adjustment for HLOs that will be processed later. - values_that_must_be_kept_as_f32_.insert(value); + const PrimitiveType output_type = + ShapeUtil::GetSubshape(output_shape, index).element_type(); + if (subshape->element_type() == output_type) { + return; + } + CHECK(output_type == F32 || output_type == BF16); + subshape->set_element_type(output_type); + // It's possible that output_type is F32, but the root instruction's + // type is BF16; e.g., a fusion node's output was changed to BF16 + // initially but then adjusted back to F32, and the fusion computation + // is now being adjusted after the fusion node. + if (output_type == F32) { + for (const auto* value : + dataflow_->GetValueSet(root, index).values()) { + // We rely on the fact that this adjustment works in reverse + // topological order so that called computation will be + // processed later. Adding the value to + // values_that_must_be_kept_as_f32_ will ensure the + // correctness of the adjustment for HLOs that will be + // processed later. + values_that_must_be_kept_as_f32_.insert(value); + } } + changed_ = true; + VLOG(2) << "Called computation root " << root->ToString() + << " at shape index " << index + << " adjusted to match output shape of " << hlo->ToString(); + }); + }; + + switch (hlo->opcode()) { + case HloOpcode::kFusion: + adjust_computation(hlo->fused_instructions_computation(), hlo->shape()); + break; + case HloOpcode::kWhile: + adjust_computation(hlo->while_condition(), hlo->shape()); + adjust_computation(hlo->while_body(), hlo->shape()); + break; + default: + break; + } +} + +bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( + HloComputation* computation, + tensorflow::gtl::FlatSet* visited_computations) { + bool parameter_changed = false; + auto insts = computation->MakeInstructionPostOrder(); + // Do the adjustment on each instruction in the computation in reverse + // topological order. + for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { + auto hlo = *inst_it; + auto adjust_hlo_output = [this, hlo, ¶meter_changed]( + Shape* subshape, const ShapeIndex& index) { + if (subshape->element_type() != F32 && subshape->element_type() != BF16) { + return; + } + PrimitiveType type = BF16; + for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { + if (value->shape().element_type() == BF16) { + continue; } + CHECK_EQ(value->shape().element_type(), F32); + type = F32; + break; + } + // It's possible that a user has been changed from BF16 to F32 + // during this final adjustment pass, so we need to check + // AllUsersConsumeBF16() again. + if (type == BF16 && !AllUsersConsumeBF16(*hlo, index)) { + type = F32; + } + if (type == F32) { + for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { + // We rely on the fact that this adjustment works in reverse + // topological order. Adding the value to + // values_that_must_be_kept_as_f32_ will ensure the correctness + // of the adjustment for HLOs that will be processed later. + values_that_must_be_kept_as_f32_.insert(value); + } + } + if (type != subshape->element_type()) { subshape->set_element_type(type); - }; - ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), adjust_buffer); - } - // Now adjust parameters of fusions inside this computation. - for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { - auto hlo = *inst_it; - if (hlo->opcode() == HloOpcode::kFusion) { - AdjustFusionParameters(hlo); + VLOG(2) << "HloInstruction output at shape index " << index + << " adjusted to " << *subshape << ": " << hlo->ToString(); + if (hlo->opcode() == HloOpcode::kParameter) { + parameter_changed = true; + } + } + }; + ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), adjust_hlo_output); + AdjustCalledComputationRoot(hlo); + if (hlo->opcode() == HloOpcode::kWhile) { + // We need to run on the while body and condition repeatedly until a fixed + // point is reached, i.e., the parameters do not change any more. We may + // need more than one iteration because the while input and output alias + // each other, so changing one input parameter requires changing the + // corresponding output element and thus may transitively require changing + // another input parameter. A fixed point will be reached because the + // parameters can only be changed from BF16 to F32, not the other way + // around. + tensorflow::gtl::FlatSet visited_in_while; + while (ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_condition(), + &visited_in_while) || + ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_body(), + &visited_in_while)) { + visited_in_while.clear(); + ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), + adjust_hlo_output); + AdjustCalledComputationRoot(hlo); } + visited_computations->insert(visited_in_while.begin(), + visited_in_while.end()); } } + // Now adjust parameters of called computations. + for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { + AdjustCalledComputationParameters(*inst_it); + } + return parameter_changed; +} + +Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( + HloModule* module) { + std::list computations_topological_order = + module->MakeComputationPostOrder(); + tensorflow::gtl::FlatSet resolved; + for (auto comp_it = computations_topological_order.rbegin(); + comp_it != computations_topological_order.rend(); ++comp_it) { + if (ContainsKey(resolved, *comp_it)) { + continue; + } + ResolveInconsistencyOfAliasingBuffersHelper(*comp_it, &resolved); + } // We could have changed a fusion computation's root shape to have a different // precision than the fusion node's output, if the fusion root does not @@ -382,9 +584,39 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( needs_tuple_simplifier |= ShapeUtil::IsTuple(hlo->shape()); } } + + // We may have converted some constants from F32 to BF16, so adjust the + // constant literals in such cases. We do this here instead of when the + // constant node's is changed because 1) the HloInstruction interface does not + // allow resetting the literal so we have to create a new kConstant + // instruction to replace the old one, which invalidates dataflow analysis, + // and 2) it's possible that a kConstant's output gets changed to BF16 at the + // beginning but later on adjusted back to F32, so converting literals here + // can avoid repeated conversions. + // + // TODO(b/73833576): Consider resetting literal in HloInstruction. + bool needs_dce = needs_tuple_simplifier; + for (auto computation : computations_topological_order) { + for (auto hlo : computation->MakeInstructionPostOrder()) { + if (hlo->opcode() != HloOpcode::kConstant) { + continue; + } + if (!ShapeUtil::Equal(hlo->literal().shape(), hlo->shape())) { + TF_ASSIGN_OR_RETURN(auto converted_literal, + hlo->literal().ConvertToShape(hlo->shape())); + auto new_constant = computation->AddInstruction( + HloInstruction::CreateConstant(std::move(converted_literal))); + TF_RETURN_IF_ERROR(hlo->ReplaceAllUsesWith(new_constant)); + needs_dce = true; + } + } + } + if (needs_tuple_simplifier) { TupleSimplifier tuple_simplifier; TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + } + if (needs_dce) { HloDCE dce; TF_RETURN_IF_ERROR(dce.Run(module).status()); } diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h index ccf77d7b4e..89a5ac5db1 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.h +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h @@ -38,7 +38,8 @@ namespace xla { // be bitwise identical to that without this pass; this is possible if the // backend already reduces precision to BF16 on some HLO instructions. // -// This pass will not modify the signature of any non-fusion computation. +// This pass will not modify the signature of a computation, unless it is a +// fusion computation or its only caller is a while. // // !!! WARNING !!! This pass can introduce mixed precision in individual HLOs, // which has two issues: @@ -92,8 +93,23 @@ class BFloat16Propagation : public HloPassInterface { bool skip_parameters); // Special handling in the mutation pass for fusion computations. + // + // Precondition: hlo->opcode() == kFusion void DetermineAndMutateFusionComputationPrecision(HloInstruction* fusion); + // Special handling in the mutation pass for while computations. + // + // Precondition: hlo->opcode() == kWhile + void DetermineAndMutateWhileComputationsPrecision(HloInstruction* while_hlo); + + // The set of HloInstructions that have been visited in the mutation pass. + tensorflow::gtl::FlatSet + instructions_visited_in_mutation_pass_; + + // The set of HloComputations that have been visited in the mutation pass. + tensorflow::gtl::FlatSet + computations_visited_in_mutation_pass_; + // *************************** // Functions called by the final inconsistency resolving pass. @@ -102,9 +118,20 @@ class BFloat16Propagation : public HloPassInterface { // same precision. Status ResolveInconsistencyOfAliasingBuffers(HloModule* module); - // Makes the fusion parameters match the precision of the actual parameters - // passed to the fusion node. - void AdjustFusionParameters(HloInstruction* fusion); + // Resolves inconsistency of aliasing buffers for the given computation, and + // recursively runs on a while instruction's condition and body until a fixed + // point is reached. + bool ResolveInconsistencyOfAliasingBuffersHelper( + HloComputation* computation, + tensorflow::gtl::FlatSet* visited_computations); + + // Makes the parameters of called computations match how they are called by + // the given HLO. + void AdjustCalledComputationParameters(HloInstruction* hlo); + + // Makes the root instructions of called computations match how they are used + // by the given HLO. + void AdjustCalledComputationRoot(HloInstruction* hlo); // *************************** // Functions called and state used by two or more passes. @@ -117,8 +144,10 @@ class BFloat16Propagation : public HloPassInterface { // The set of F32 HLO values that must be kept in F32. tensorflow::gtl::FlatSet values_that_must_be_kept_as_f32_; - // *************************** - // State used by both passes. + // Mapping from each HloComputation to the number of callers to it in the + // module. Populated at the beginning of this pass. + tensorflow::gtl::FlatMap caller_counts_; + const BFloat16Support* bfloat16_support_; std::unique_ptr dataflow_; diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc index 2047e2053a..5950b004b3 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { @@ -121,6 +122,41 @@ TEST_F(BFloat16PropagationTest, PropagateThroughSelectButNotAdd) { EXPECT_FALSE(OutputsBF16(c)); } +// Tests that if a constant is converted to BF16 then its literal must also be +// converted. +TEST_F(BFloat16PropagationTest, ConvertConstantLiteral) { + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + Array2D array_a(4, 4); + array_a.FillUnique(1.0f); + Array2D array_b(4, 4); + array_b.FillUnique(10.0f); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateFromArray(array_a))); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateFromArray(array_b))); + HloInstruction* dot = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, a, b)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + + EXPECT_EQ(computation->root_instruction(), dot); + EXPECT_TRUE(OutputsBF16(dot->operand(0))); + EXPECT_TRUE(OutputsBF16(dot->operand(1))); + EXPECT_EQ(dot->operand(0)->opcode(), HloOpcode::kConstant); + EXPECT_EQ(dot->operand(1)->opcode(), HloOpcode::kConstant); + LiteralTestUtil::ExpectEqual( + dot->operand(0)->literal(), + *LiteralTestUtil::ConvertF32ToBF16(*Literal::CreateFromArray(array_a))); + LiteralTestUtil::ExpectEqual( + dot->operand(1)->literal(), + *LiteralTestUtil::ConvertF32ToBF16(*Literal::CreateFromArray(array_b))); +} + // Tests that BF16 can be propagated through nested tuples. TEST_F(BFloat16PropagationTest, PropagateThroughTuples) { auto builder = HloComputation::Builder(TestName()); @@ -390,4 +426,195 @@ TEST_F(BFloat16PropagationTest, SelectOverTuples) { EXPECT_TRUE(OutputsBF16(xpose)); } +// Tests that BF16 is propagated properly through while computations. +TEST_F(BFloat16PropagationTest, PropagateThroughWhile) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, shape, "param1")); + HloInstruction* add0 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add1 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* tuple = + builder.AddInstruction(HloInstruction::CreateTuple({add0, add1})); + + auto builder_cond = HloComputation::Builder("cond"); + auto cond_param = builder_cond.AddInstruction( + HloInstruction::CreateParameter(0, tuple->shape(), "cond_param")); + auto cond_lhs = builder_cond.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond_param, 0)); + auto cond_rhs = builder_cond.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond_param, 1)); + // This add should prevent RHS from using BF16 + auto cond_add_rhs = builder_cond.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, cond_rhs, cond_rhs)); + auto cond_dot = builder_cond.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, cond_lhs, cond_add_rhs)); + builder_cond.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kGt, + builder_cond.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond_dot, {0, 0}, {1, 1}, {1, 1})), + builder_cond.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond_dot, {1, 1}, {2, 2}, {1, 1})))); + auto cond = module->AddEmbeddedComputation(builder_cond.Build()); + + auto builder_body = HloComputation::Builder("body"); + auto body_param = builder_body.AddInstruction( + HloInstruction::CreateParameter(0, tuple->shape(), "body_param")); + auto body_lhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 0)); + auto body_rhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 1)); + auto body_dot = builder_body.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, body_lhs, body_rhs)); + builder_body.AddInstruction( + HloInstruction::CreateTuple({body_dot, body_rhs})); + auto body = module->AddEmbeddedComputation(builder_body.Build()); + + auto while_hlo = builder.AddInstruction( + HloInstruction::CreateWhile(tuple->shape(), cond, body, tuple)); + + auto lhs = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while_hlo, 0)); + auto rhs = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while_hlo, 1)); + auto dot = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, lhs, rhs)); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + + EXPECT_EQ(computation->root_instruction(), dot); + EXPECT_TRUE(OutputsBF16(lhs)); + EXPECT_FALSE(OutputsBF16(rhs)); + EXPECT_TRUE(OutputsBF16(body_dot)); + EXPECT_TRUE(OutputsBF16(body_lhs)); + EXPECT_FALSE(OutputsBF16(body_rhs)); + EXPECT_TRUE(OutputsBF16(cond_lhs)); + EXPECT_FALSE(OutputsBF16(cond_rhs)); + EXPECT_TRUE(OutputsBF16(add0)); + EXPECT_FALSE(OutputsBF16(add1)); +} + +// Tests that BF16 is not propagated through multiple whiles that invoke the +// same computation as long as one while prevents the propagation. +TEST_F(BFloat16PropagationTest, DoNotPropagateWhilesCallingSameComputation) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, shape, "param1")); + HloInstruction* add0 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add1 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add2 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add3 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* tuple0 = + builder.AddInstruction(HloInstruction::CreateTuple({add0, add1})); + HloInstruction* tuple1 = + builder.AddInstruction(HloInstruction::CreateTuple({add2, add3})); + + // Condition computation for the first while. + auto builder_cond0 = HloComputation::Builder("cond0"); + auto cond0_param = builder_cond0.AddInstruction( + HloInstruction::CreateParameter(0, tuple0->shape(), "cond0_param")); + auto cond0_lhs = builder_cond0.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond0_param, 0)); + auto cond0_rhs = builder_cond0.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond0_param, 1)); + // This add should prevent RHS from using BF16 + auto cond0_add_rhs = + builder_cond0.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kAdd, cond0_rhs, cond0_rhs)); + auto cond0_dot = builder_cond0.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, cond0_lhs, cond0_add_rhs)); + builder_cond0.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kGt, + builder_cond0.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond0_dot, {0, 0}, {1, 1}, {1, 1})), + builder_cond0.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond0_dot, {1, 1}, {2, 2}, {1, 1})))); + auto cond0 = module->AddEmbeddedComputation(builder_cond0.Build()); + + // Condition computation for the second while. + auto builder_cond1 = HloComputation::Builder("cond1"); + auto cond1_param = builder_cond1.AddInstruction( + HloInstruction::CreateParameter(0, tuple1->shape(), "cond1_param")); + auto cond1_lhs = builder_cond1.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond1_param, 0)); + auto cond1_rhs = builder_cond1.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond1_param, 1)); + // This add should prevent LHS from using BF16 + auto cond1_add_lhs = + builder_cond1.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kAdd, cond1_lhs, cond1_lhs)); + auto cond1_dot = builder_cond1.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, cond1_add_lhs, cond1_rhs)); + builder_cond1.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kGt, + builder_cond1.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond1_dot, {0, 0}, {1, 1}, {1, 1})), + builder_cond1.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond1_dot, {1, 1}, {2, 2}, {1, 1})))); + auto cond1 = module->AddEmbeddedComputation(builder_cond1.Build()); + + // Body computation shared by both whiles. + auto builder_body = HloComputation::Builder("body"); + auto body_param = builder_body.AddInstruction( + HloInstruction::CreateParameter(0, tuple0->shape(), "body_param")); + auto body_lhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 0)); + auto body_rhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 1)); + auto body_dot = builder_body.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, body_lhs, body_rhs)); + builder_body.AddInstruction( + HloInstruction::CreateTuple({body_dot, body_rhs})); + auto body = module->AddEmbeddedComputation(builder_body.Build()); + + auto while0 = builder.AddInstruction( + HloInstruction::CreateWhile(tuple0->shape(), cond0, body, tuple0)); + auto while1 = builder.AddInstruction( + HloInstruction::CreateWhile(tuple1->shape(), cond1, body, tuple1)); + + auto lhs = builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while0, 0)), + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while0, 1)))); + auto rhs = builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while1, 0)), + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while1, 1)))); + auto dot = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, lhs, rhs)); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + EXPECT_FALSE(OutputsBF16(body_dot)); + EXPECT_FALSE(OutputsBF16(body_rhs)); + EXPECT_FALSE(OutputsBF16(body_lhs)); + EXPECT_FALSE(OutputsBF16(cond0_lhs)); + EXPECT_FALSE(OutputsBF16(cond0_rhs)); + EXPECT_FALSE(OutputsBF16(cond1_lhs)); + EXPECT_FALSE(OutputsBF16(cond1_rhs)); + EXPECT_TRUE(OutputsBF16(cond0_add_rhs)); + EXPECT_TRUE(OutputsBF16(cond1_add_lhs)); + EXPECT_EQ(computation->root_instruction(), dot); +} + } // namespace xla -- GitLab From 3fb65ed8667df659ea8634a7e142e989cecea9f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 11:18:41 -0800 Subject: [PATCH 264/311] Add a configurable preference for scheduling fuller batches sooner to the adaptive shared batcher. A full batch will now be scheduled before an older, nearly empty batch as long as the age gap is less than full_batch_scheduling_boost_micros. This parameter improves latency under heavy load, but too large a value will harm tail latency. PiperOrigin-RevId: 187644796 --- .../adaptive_shared_batch_scheduler.h | 61 +++++++++------- .../adaptive_shared_batch_scheduler_test.cc | 71 +++++++++++++++++++ 2 files changed, 107 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 661ed239d3..339d792302 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -19,7 +19,6 @@ limitations under the License. #include #include #include -#include #include #include #include @@ -44,15 +43,14 @@ template class ASBSQueue; } // namespace internal -// EXPERIMENTAL: API MAY BE SUBJECTED TO SUDDEN CHANGES. -// // Shared batch scheduler designed to minimize latency. The scheduler keeps // track of a number of queues (one per model or model version) which are // continuously enqueuing requests. The scheduler groups the requests into // batches which it periodically sends off for processing (see // shared_batch_scheduler.h for more details). AdaptiveSharedBatchScheduler -// (ASBS) prioritizes batches by age (i.e. the batch's oldest request) -// irrespective of queue or batch size. +// (ASBS) prioritizes batches primarily by age (i.e. the batch's oldest request) +// along with a configurable preference for scheduling larger batches first. +// // // ASBS tries to keep the system busy by maintaining an adjustable number of // concurrently processed batches. If a new batch is created, and the number of @@ -93,6 +91,13 @@ class AdaptiveSharedBatchScheduler // for num_batch_threads allows for large in_flight_batches_limit_, which // will harm latency for some time once load increases again. int64 num_batch_threads = port::NumSchedulableCPUs(); + // Although batch selection is primarily based on age, this parameter + // specifies a preference for larger batches. A full batch will be + // scheduled before an older, nearly empty batch as long as the age gap is + // less than full_batch_scheduling_boost_micros. The optimal value for this + // parameter should be of order the batch processing latency, but must be + // chosen carefully, as too large a value will harm tail latency. + int64 full_batch_scheduling_boost_micros = 0; // The environment to use (typically only overridden by test code). Env* env = Env::Default(); // Initial limit for number of batches being concurrently processed. @@ -153,17 +158,9 @@ class AdaptiveSharedBatchScheduler const Options options_; - struct BatchCompare { - bool operator()(const internal::ASBSBatch* a, - const internal::ASBSBatch* b); - }; - // Collection of batches added by AddBatch, ordered by age. Owned by scheduler // until they are released for processing. - std::priority_queue*, - std::vector*>, - BatchCompare> - batches_ GUARDED_BY(mu_); + std::vector*> batches_ GUARDED_BY(mu_); // Unowned queues and callbacks added by AddQueue. std::unordered_map*, BatchProcessor> @@ -288,6 +285,11 @@ Status AdaptiveSharedBatchScheduler::Create( return errors::InvalidArgument("num_batch_threads must be positive; was ", options.num_batch_threads); } + if (options.full_batch_scheduling_boost_micros < 0) { + return errors::InvalidArgument( + "full_batch_scheduling_boost_micros can't be negative; was ", + options.full_batch_scheduling_boost_micros); + } if (options.initial_in_flight_batches_limit > options.num_batch_threads) { return errors::InvalidArgument( "initial_in_flight_batches_limit (", @@ -348,7 +350,7 @@ template void AdaptiveSharedBatchScheduler::AddBatch( const internal::ASBSBatch* batch) { mutex_lock l(mu_); - batches_.push(batch); + batches_.push_back(batch); MaybeScheduleNextBatch(); } @@ -366,10 +368,26 @@ void AdaptiveSharedBatchScheduler::MaybeScheduleNextBatch() { // Non-integer limit handled probabilistially. if (in_flight_batches_limit_ - in_flight_batches_ < 1 && rand_double_(rand_engine_) > - (in_flight_batches_limit_ - in_flight_batches_)) + in_flight_batches_limit_ - in_flight_batches_) { return; - const internal::ASBSBatch* batch = batches_.top(); - batches_.pop(); + } + auto best_it = batches_.begin(); + double best_score = + (*best_it)->creation_time_micros() - + options_.full_batch_scheduling_boost_micros * (*best_it)->size() / + static_cast((*best_it)->queue()->max_task_size()); + for (auto it = batches_.begin() + 1; it != batches_.end(); it++) { + const double score = + (*it)->creation_time_micros() - + options_.full_batch_scheduling_boost_micros * (*it)->size() / + static_cast((*it)->queue()->max_task_size()); + if (score < best_score) { + best_score = score; + best_it = it; + } + } + const internal::ASBSBatch* batch = *best_it; + batches_.erase(best_it); // Queue may destroy itself after ReleaseBatch is called. batch->queue()->ReleaseBatch(batch); batch_thread_pool_->Schedule( @@ -427,13 +445,6 @@ void AdaptiveSharedBatchScheduler::CallbackWrapper( MaybeScheduleNextBatch(); } -template -bool AdaptiveSharedBatchScheduler::BatchCompare::operator()( - const internal::ASBSBatch* a, - const internal::ASBSBatch* b) { - return a->creation_time_micros() > b->creation_time_micros(); -} - // ---------------- ASBSQueue ---------------- namespace internal { diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc index 109234287e..1be0c1f5c6 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc @@ -180,6 +180,77 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { stop_teardown.Notify(); } +TEST(AdaptiveSharedBatchSchedulerTest, FullBatchSchedulingBoostMicros) { + test_util::FakeClockEnv env(Env::Default()); + Notification start_teardown, stop_teardown; + std::unique_ptr teardown_thread = + CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); + { + AdaptiveSharedBatchScheduler::Options options; + options.env = &env; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + options.full_batch_scheduling_boost_micros = 100; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + finish_processing.WaitForNotification(); + mutex_lock l(mu); + processed_batches++; + switch (processed_batches) { + case 1: + EXPECT_EQ(100, batch->size()); + break; + case 2: + EXPECT_EQ(50, batch->size()); + break; + case 3: + EXPECT_EQ(900, batch->size()); + break; + case 4: + EXPECT_EQ(200, batch->size()); + break; + default: + EXPECT_TRUE(false) << "Should only have 4 batches"; + } + }; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + AdaptiveSharedBatchScheduler::QueueOptions queue_options; + std::unique_ptr> queue1; + std::unique_ptr> queue2; + queue_options.max_batch_size = 1000; + TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue1)); + queue_options.max_batch_size = 100; + TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue2)); + + // First batch immediately processed. + TF_ASSERT_OK(ScheduleTask(100, queue1.get())); + + TF_ASSERT_OK(ScheduleTask(100, queue1.get())); + env.AdvanceByMicroseconds(10); + TF_ASSERT_OK(ScheduleTask(100, queue1.get())); + env.AdvanceByMicroseconds(10); + + TF_ASSERT_OK(ScheduleTask(50, queue2.get())); + env.AdvanceByMicroseconds(45); + + TF_ASSERT_OK(ScheduleTask(900, queue1.get())); + + // Second batch - creation time: 0, fullness: 0.2, sched score: -20 + // Third batch - creation time: 20, fullness: 0.5, sched score: -30 + // Fourth batch - creation time: 65, fullness: 0.9, sched score: -25 + + finish_processing.Notify(); + start_teardown.Notify(); + } + stop_teardown.Notify(); +} + TEST(AdaptiveSharedBatchSchedulerTest, DeleteQueue) { AdaptiveSharedBatchScheduler::Options options; options.initial_in_flight_batches_limit = 1; -- GitLab From 1ded0ecca819e8569f120a3eb35cc477636f3340 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 2 Mar 2018 11:35:53 -0800 Subject: [PATCH 265/311] GCS: Update throttle state even if disabled. PiperOrigin-RevId: 187647263 --- tensorflow/core/platform/cloud/gcs_throttle.cc | 4 +--- tensorflow/core/platform/cloud/gcs_throttle.h | 13 +++++++++++-- .../core/platform/cloud/gcs_throttle_test.cc | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_throttle.cc b/tensorflow/core/platform/cloud/gcs_throttle.cc index eb5f8958a3..27dd06a625 100644 --- a/tensorflow/core/platform/cloud/gcs_throttle.cc +++ b/tensorflow/core/platform/cloud/gcs_throttle.cc @@ -26,10 +26,9 @@ GcsThrottle::GcsThrottle(EnvTime* env_time) bool GcsThrottle::AdmitRequest() { mutex_lock l(mu_); - if (!config_.enabled) return true; UpdateState(); if (available_tokens_ < config_.tokens_per_request) { - return false; + return false || !config_.enabled; } available_tokens_ -= config_.tokens_per_request; return true; @@ -37,7 +36,6 @@ bool GcsThrottle::AdmitRequest() { void GcsThrottle::RecordResponse(size_t num_bytes) { mutex_lock l(mu_); - if (!config_.enabled) return; UpdateState(); available_tokens_ -= request_bytes_to_tokens(num_bytes); } diff --git a/tensorflow/core/platform/cloud/gcs_throttle.h b/tensorflow/core/platform/cloud/gcs_throttle.h index 1a89daef08..6d5eed7338 100644 --- a/tensorflow/core/platform/cloud/gcs_throttle.h +++ b/tensorflow/core/platform/cloud/gcs_throttle.h @@ -109,13 +109,22 @@ class GcsThrottle { * purpose of this function is to make available to monitoring or other * instrumentation the number of available tokens in the pool. */ - inline int64 available_tokens() { + inline int64 available_tokens() LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); - if (!config_.enabled) return 0; UpdateState(); return available_tokens_; } + /** + * is_enabled determines if the throttle is enabled. + * + * If !is_enabled(), AdmitRequest() will always return true. + */ + bool is_enabled() LOCKS_EXCLUDED(mu_) { + mutex_lock l(mu_); + return config_.enabled; + } + private: /** * UpdateState updates the available_tokens_ and last_updated_secs_ variables. diff --git a/tensorflow/core/platform/cloud/gcs_throttle_test.cc b/tensorflow/core/platform/cloud/gcs_throttle_test.cc index 694756022e..57193ac405 100644 --- a/tensorflow/core/platform/cloud/gcs_throttle_test.cc +++ b/tensorflow/core/platform/cloud/gcs_throttle_test.cc @@ -96,6 +96,24 @@ TEST_F(GcsThrottleTest, ReverseTime) { EXPECT_EQ(200000, throttle_.available_tokens()); } +TEST(GcsThrottleDisabledTest, Disabled) { + TestTime time; + GcsThrottle throttle(&time); + ASSERT_FALSE(throttle.is_enabled()); // Verify throttle is disabled. + + EXPECT_EQ(0, throttle.available_tokens()); + time.AdvanceSeconds(1); + EXPECT_EQ(100000, throttle.available_tokens()); + EXPECT_TRUE(throttle.AdmitRequest()); + EXPECT_EQ(99900, throttle.available_tokens()); + time.AdvanceSeconds(1); + EXPECT_EQ(199900, throttle.available_tokens()); + throttle.RecordResponse(128000000); // 128 MB response. + EXPECT_LT(0, throttle.available_tokens()); + // Admit request even without available tokens + EXPECT_TRUE(throttle.AdmitRequest()); +} + } // namespace } // namespace tensorflow -- GitLab From 2abc47106624e0102c917535dd6df45561550ade Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Fri, 2 Mar 2018 11:59:02 -0800 Subject: [PATCH 266/311] Move the PS_OPS from Estimator to device_setter to benefit more users. PiperOrigin-RevId: 187650283 --- tensorflow/python/estimator/estimator.py | 10 ++-------- tensorflow/python/training/device_setter.py | 9 +++++++++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 1a2b33721a..60351471f1 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -49,6 +49,7 @@ from tensorflow.python.saved_model import builder as saved_model_builder from tensorflow.python.saved_model import tag_constants from tensorflow.python.summary import summary from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import device_setter from tensorflow.python.training import evaluation from tensorflow.python.training import monitored_session from tensorflow.python.training import saver @@ -1007,13 +1008,6 @@ def _get_replica_device_setter(config): Returns: A replica device setter, or None. """ - ps_ops = [ - 'Variable', 'VariableV2', 'AutoReloadVariable', 'MutableHashTable', - 'MutableHashTableV2', 'MutableHashTableOfTensors', - 'MutableHashTableOfTensorsV2', 'MutableDenseHashTable', - 'MutableDenseHashTableV2', 'VarHandleOp' - ] - if config.task_type: worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id) else: @@ -1024,7 +1018,7 @@ def _get_replica_device_setter(config): ps_tasks=config.num_ps_replicas, worker_device=worker_device, merge_devices=True, - ps_ops=ps_ops, + ps_ops=list(device_setter.STANDARD_PS_OPS), cluster=config.cluster_spec) else: return None diff --git a/tensorflow/python/training/device_setter.py b/tensorflow/python/training/device_setter.py index 689088bb41..0e824d89e9 100644 --- a/tensorflow/python/training/device_setter.py +++ b/tensorflow/python/training/device_setter.py @@ -25,6 +25,15 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib from tensorflow.python.util.tf_export import tf_export +# This is a tuple of PS ops used by tf.estimator.Esitmator which should work in +# almost all of cases. +STANDARD_PS_OPS = ( + "Variable", "VariableV2", "AutoReloadVariable", "MutableHashTable", + "MutableHashTableV2", "MutableHashTableOfTensors", + "MutableHashTableOfTensorsV2", "MutableDenseHashTable", + "MutableDenseHashTableV2", "VarHandleOp" +) + class _RoundRobinStrategy(object): """Returns the next ps task index for placement in round-robin order. -- GitLab From 41aa3e75ca35c763c23aeedf2409589b7814c7f1 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 2 Mar 2018 12:19:23 -0800 Subject: [PATCH 267/311] GCS: Extract block cache interface from implementation. PiperOrigin-RevId: 187652953 --- tensorflow/core/platform/cloud/BUILD | 20 +- .../core/platform/cloud/file_block_cache.h | 161 +----------- .../core/platform/cloud/gcs_file_system.cc | 15 +- ...block_cache.cc => ram_file_block_cache.cc} | 35 +-- .../platform/cloud/ram_file_block_cache.h | 229 ++++++++++++++++++ ...e_test.cc => ram_file_block_cache_test.cc} | 60 ++--- 6 files changed, 311 insertions(+), 209 deletions(-) rename tensorflow/core/platform/cloud/{file_block_cache.cc => ram_file_block_cache.cc} (89%) create mode 100644 tensorflow/core/platform/cloud/ram_file_block_cache.h rename tensorflow/core/platform/cloud/{file_block_cache_test.cc => ram_file_block_cache_test.cc} (92%) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 9ba25dea4f..0a17a419d3 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -38,13 +38,24 @@ cc_library( cc_library( name = "file_block_cache", - srcs = ["file_block_cache.cc"], hdrs = ["file_block_cache.h"], copts = tf_copts(), visibility = ["//tensorflow:__subpackages__"], deps = ["//tensorflow/core:lib"], ) +cc_library( + name = "ram_file_block_cache", + srcs = ["ram_file_block_cache.cc"], + hdrs = ["ram_file_block_cache.h"], + copts = tf_copts(), + visibility = ["//tensorflow:__subpackages__"], + deps = [ + ":file_block_cache", + "//tensorflow/core:lib", + ], +) + cc_library( name = "gcs_dns_cache", srcs = ["gcs_dns_cache.cc"], @@ -83,6 +94,7 @@ cc_library( ":gcs_throttle", ":google_auth_provider", ":http_request", + ":ram_file_block_cache", ":retrying_file_system", ":retrying_utils", ":time_util", @@ -245,12 +257,12 @@ tf_cc_test( ) tf_cc_test( - name = "file_block_cache_test", + name = "ram_file_block_cache_test", size = "small", - srcs = ["file_block_cache_test.cc"], + srcs = ["ram_file_block_cache_test.cc"], deps = [ - ":file_block_cache", ":now_seconds_env", + ":ram_file_block_cache", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:test", diff --git a/tensorflow/core/platform/cloud/file_block_cache.h b/tensorflow/core/platform/cloud/file_block_cache.h index 5c180e2332..da16788247 100644 --- a/tensorflow/core/platform/cloud/file_block_cache.h +++ b/tensorflow/core/platform/cloud/file_block_cache.h @@ -32,7 +32,7 @@ limitations under the License. namespace tensorflow { -/// \brief An LRU block cache of file contents, keyed by {filename, offset}. +/// \brief A block cache of file contents, keyed by {filename, offset}. /// /// This class should be shared by read-only random access files on a remote /// filesystem (e.g. GCS). @@ -48,27 +48,7 @@ class FileBlockCache { size_t* bytes_transferred)> BlockFetcher; - FileBlockCache(size_t block_size, size_t max_bytes, uint64 max_staleness, - BlockFetcher block_fetcher, Env* env = Env::Default()) - : block_size_(block_size), - max_bytes_(max_bytes), - max_staleness_(max_staleness), - block_fetcher_(block_fetcher), - env_(env) { - if (max_staleness_ > 0) { - pruning_thread_.reset(env_->StartThread(ThreadOptions(), "TF_prune_FBC", - [this] { Prune(); })); - } - } - - ~FileBlockCache() { - if (pruning_thread_) { - stop_pruning_thread_.Notify(); - // Destroying pruning_thread_ will block until Prune() receives the above - // notification and returns. - pruning_thread_.reset(); - } - } + virtual ~FileBlockCache() {} /// Read `n` bytes from `filename` starting at `offset` into `out`. This /// method will return: @@ -84,143 +64,22 @@ class FileBlockCache { /// placed in `out`. /// 4) OK otherwise (i.e. the read succeeded, and at least one byte was placed /// in `out`). - Status Read(const string& filename, size_t offset, size_t n, char* buffer, - size_t* bytes_transferred); + virtual Status Read(const string& filename, size_t offset, size_t n, + char* buffer, size_t* bytes_transferred) = 0; /// Remove all cached blocks for `filename`. - void RemoveFile(const string& filename) LOCKS_EXCLUDED(mu_); + virtual void RemoveFile(const string& filename) = 0; /// Remove all cached data. - void Flush() LOCKS_EXCLUDED(mu_); + virtual void Flush() = 0; /// Accessors for cache parameters. - size_t block_size() const { return block_size_; } - size_t max_bytes() const { return max_bytes_; } - uint64 max_staleness() const { return max_staleness_; } + virtual size_t block_size() const = 0; + virtual size_t max_bytes() const = 0; + virtual uint64 max_staleness() const = 0; /// The current size (in bytes) of the cache. - size_t CacheSize() const LOCKS_EXCLUDED(mu_); - - private: - /// The size of the blocks stored in the LRU cache, as well as the size of the - /// reads from the underlying filesystem. - const size_t block_size_; - /// The maximum number of bytes (sum of block sizes) allowed in the LRU cache. - const size_t max_bytes_; - /// The maximum staleness of any block in the LRU cache, in seconds. - const uint64 max_staleness_; - /// The callback to read a block from the underlying filesystem. - const BlockFetcher block_fetcher_; - /// The Env from which we read timestamps. - Env* const env_; // not owned - - /// \brief The key type for the file block cache. - /// - /// The file block cache key is a {filename, offset} pair. - typedef std::pair Key; - - /// \brief The state of a block. - /// - /// A block begins in the CREATED stage. The first thread will attempt to read - /// the block from the filesystem, transitioning the state of the block to - /// FETCHING. After completing, if the read was successful the state should - /// be FINISHED. Otherwise the state should be ERROR. A subsequent read can - /// re-fetch the block if the state is ERROR. - enum class FetchState { - CREATED, - FETCHING, - FINISHED, - ERROR, - }; - - /// \brief A block of a file. - /// - /// A file block consists of the block data, the block's current position in - /// the LRU cache, the timestamp (seconds since epoch) at which the block - /// was cached, a coordination lock, and state & condition variables. - /// - /// Thread safety: - /// The iterator and timestamp fields should only be accessed while holding - /// the block-cache-wide mu_ instance variable. The state variable should only - /// be accessed while holding the Block's mu lock. The data vector should only - /// be accessed after state == FINISHED, and it should never be modified. - /// - /// In order to prevent deadlocks, never grab the block-cache-wide mu_ lock - /// AFTER grabbing any block's mu lock. It is safe to grab mu without locking - /// mu_. - struct Block { - /// The block data. - std::vector data; - /// A list iterator pointing to the block's position in the LRU list. - std::list::iterator lru_iterator; - /// A list iterator pointing to the block's position in the LRA list. - std::list::iterator lra_iterator; - /// The timestamp (seconds since epoch) at which the block was cached. - uint64 timestamp; - /// Mutex to guard state variable - mutex mu; - /// The state of the block. - FetchState state GUARDED_BY(mu) = FetchState::CREATED; - /// Wait on cond_var if state is FETCHING. - condition_variable cond_var; - }; - - /// \brief The block map type for the file block cache. - /// - /// The block map is an ordered map from Key to Block. - typedef std::map> BlockMap; - - /// Prune the cache by removing files with expired blocks. - void Prune() LOCKS_EXCLUDED(mu_); - - bool BlockNotStale(const std::shared_ptr& block) - EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// Look up a Key in the block cache. - std::shared_ptr Lookup(const Key& key) LOCKS_EXCLUDED(mu_); - - Status MaybeFetch(const Key& key, const std::shared_ptr& block) - LOCKS_EXCLUDED(mu_); - - /// Trim the block cache to make room for another entry. - void Trim() EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// Update the LRU iterator for the block at `key`. - Status UpdateLRU(const Key& key, const std::shared_ptr& block) - LOCKS_EXCLUDED(mu_); - - /// Remove all blocks of a file, with mu_ already held. - void RemoveFile_Locked(const string& filename) EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// Remove the block `entry` from the block map and LRU list, and update the - /// cache size accordingly. - void RemoveBlock(BlockMap::iterator entry) EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// The cache pruning thread that removes files with expired blocks. - std::unique_ptr pruning_thread_; - - /// Notification for stopping the cache pruning thread. - Notification stop_pruning_thread_; - - /// Guards access to the block map, LRU list, and cached byte count. - mutable mutex mu_; - - /// The block map (map from Key to Block). - BlockMap block_map_ GUARDED_BY(mu_); - - /// The LRU list of block keys. The front of the list identifies the most - /// recently accessed block. - std::list lru_list_ GUARDED_BY(mu_); - - /// The LRA (least recently added) list of block keys. The front of the list - /// identifies the most recently added block. - /// - /// Note: blocks are added to lra_list_ only after they have successfully been - /// fetched from the underlying block store. - std::list lra_list_ GUARDED_BY(mu_); - - /// The combined number of bytes in all of the cached blocks. - size_t cache_size_ GUARDED_BY(mu_) = 0; + virtual size_t CacheSize() const = 0; }; } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 01ca0d76ba..84b65cec4f 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/platform/cloud/curl_http_request.h" #include "tensorflow/core/platform/cloud/file_block_cache.h" #include "tensorflow/core/platform/cloud/google_auth_provider.h" +#include "tensorflow/core/platform/cloud/ram_file_block_cache.h" #include "tensorflow/core/platform/cloud/retrying_utils.h" #include "tensorflow/core/platform/cloud/time_util.h" #include "tensorflow/core/platform/env.h" @@ -783,13 +784,13 @@ Status GcsFileSystem::NewRandomAccessFile( // A helper function to build a FileBlockCache for GcsFileSystem. std::unique_ptr GcsFileSystem::MakeFileBlockCache( size_t block_size, size_t max_bytes, uint64 max_staleness) { - std::unique_ptr file_block_cache( - new FileBlockCache(block_size, max_bytes, max_staleness, - [this](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred) { - return LoadBufferFromGCS(filename, offset, n, buffer, - bytes_transferred); - })); + std::unique_ptr file_block_cache(new RamFileBlockCache( + block_size, max_bytes, max_staleness, + [this](const string& filename, size_t offset, size_t n, char* buffer, + size_t* bytes_transferred) { + return LoadBufferFromGCS(filename, offset, n, buffer, + bytes_transferred); + })); return file_block_cache; } diff --git a/tensorflow/core/platform/cloud/file_block_cache.cc b/tensorflow/core/platform/cloud/ram_file_block_cache.cc similarity index 89% rename from tensorflow/core/platform/cloud/file_block_cache.cc rename to tensorflow/core/platform/cloud/ram_file_block_cache.cc index 6add1142a1..55a5657a50 100644 --- a/tensorflow/core/platform/cloud/file_block_cache.cc +++ b/tensorflow/core/platform/cloud/ram_file_block_cache.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/cloud/file_block_cache.h" +#include "tensorflow/core/platform/cloud/ram_file_block_cache.h" #include #include #include "tensorflow/core/lib/gtl/cleanup.h" @@ -21,7 +21,7 @@ limitations under the License. namespace tensorflow { -bool FileBlockCache::BlockNotStale(const std::shared_ptr& block) { +bool RamFileBlockCache::BlockNotStale(const std::shared_ptr& block) { mutex_lock l(block->mu); if (block->state != FetchState::FINISHED) { return true; // No need to check for staleness. @@ -30,7 +30,8 @@ bool FileBlockCache::BlockNotStale(const std::shared_ptr& block) { return env_->NowSeconds() - block->timestamp <= max_staleness_; } -std::shared_ptr FileBlockCache::Lookup(const Key& key) { +std::shared_ptr RamFileBlockCache::Lookup( + const Key& key) { mutex_lock lock(mu_); auto entry = block_map_.find(key); if (entry != block_map_.end()) { @@ -55,15 +56,15 @@ std::shared_ptr FileBlockCache::Lookup(const Key& key) { } // Remove blocks from the cache until we do not exceed our maximum size. -void FileBlockCache::Trim() { +void RamFileBlockCache::Trim() { while (!lru_list_.empty() && cache_size_ > max_bytes_) { RemoveBlock(block_map_.find(lru_list_.back())); } } /// Move the block to the front of the LRU list if it isn't already there. -Status FileBlockCache::UpdateLRU(const Key& key, - const std::shared_ptr& block) { +Status RamFileBlockCache::UpdateLRU(const Key& key, + const std::shared_ptr& block) { mutex_lock lock(mu_); if (block->timestamp == 0) { // The block was evicted from another thread. Allow it to remain evicted. @@ -92,8 +93,8 @@ Status FileBlockCache::UpdateLRU(const Key& key, return Status::OK(); } -Status FileBlockCache::MaybeFetch(const Key& key, - const std::shared_ptr& block) { +Status RamFileBlockCache::MaybeFetch(const Key& key, + const std::shared_ptr& block) { bool downloaded_block = false; auto reconcile_state = gtl::MakeCleanup([this, &downloaded_block, &key, &block] { @@ -151,11 +152,11 @@ Status FileBlockCache::MaybeFetch(const Key& key, } } return errors::Internal( - "Control flow should never reach the end of FileBlockCache::Fetch."); + "Control flow should never reach the end of RamFileBlockCache::Fetch."); } -Status FileBlockCache::Read(const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred) { +Status RamFileBlockCache::Read(const string& filename, size_t offset, size_t n, + char* buffer, size_t* bytes_transferred) { *bytes_transferred = 0; if (n == 0) { return Status::OK(); @@ -216,12 +217,12 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n, return Status::OK(); } -size_t FileBlockCache::CacheSize() const { +size_t RamFileBlockCache::CacheSize() const { mutex_lock lock(mu_); return cache_size_; } -void FileBlockCache::Prune() { +void RamFileBlockCache::Prune() { while (!WaitForNotificationWithTimeout(&stop_pruning_thread_, 1000000)) { mutex_lock lock(mu_); uint64 now = env_->NowSeconds(); @@ -238,7 +239,7 @@ void FileBlockCache::Prune() { } } -void FileBlockCache::Flush() { +void RamFileBlockCache::Flush() { mutex_lock lock(mu_); block_map_.clear(); lru_list_.clear(); @@ -246,12 +247,12 @@ void FileBlockCache::Flush() { cache_size_ = 0; } -void FileBlockCache::RemoveFile(const string& filename) { +void RamFileBlockCache::RemoveFile(const string& filename) { mutex_lock lock(mu_); RemoveFile_Locked(filename); } -void FileBlockCache::RemoveFile_Locked(const string& filename) { +void RamFileBlockCache::RemoveFile_Locked(const string& filename) { Key begin = std::make_pair(filename, 0); auto it = block_map_.lower_bound(begin); while (it != block_map_.end() && it->first.first == filename) { @@ -261,7 +262,7 @@ void FileBlockCache::RemoveFile_Locked(const string& filename) { } } -void FileBlockCache::RemoveBlock(BlockMap::iterator entry) { +void RamFileBlockCache::RemoveBlock(BlockMap::iterator entry) { // This signals that the block is removed, and should not be inadvertently // reinserted into the cache in UpdateLRU. entry->second->timestamp = 0; diff --git a/tensorflow/core/platform/cloud/ram_file_block_cache.h b/tensorflow/core/platform/cloud/ram_file_block_cache.h new file mode 100644 index 0000000000..7fdd7b2e02 --- /dev/null +++ b/tensorflow/core/platform/cloud/ram_file_block_cache.h @@ -0,0 +1,229 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_CLOUD_RAM_FILE_BLOCK_CACHE_H_ +#define TENSORFLOW_CORE_PLATFORM_CLOUD_RAM_FILE_BLOCK_CACHE_H_ + +#include +#include +#include +#include +#include +#include +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/cloud/file_block_cache.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/notification.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +/// \brief An LRU block cache of file contents, keyed by {filename, offset}. +/// +/// This class should be shared by read-only random access files on a remote +/// filesystem (e.g. GCS). +class RamFileBlockCache : public FileBlockCache { + public: + /// The callback executed when a block is not found in the cache, and needs to + /// be fetched from the backing filesystem. This callback is provided when the + /// cache is constructed. The returned Status should be OK as long as the + /// read from the remote filesystem succeeded (similar to the semantics of the + /// read(2) system call). + typedef std::function + BlockFetcher; + + RamFileBlockCache(size_t block_size, size_t max_bytes, uint64 max_staleness, + BlockFetcher block_fetcher, Env* env = Env::Default()) + : block_size_(block_size), + max_bytes_(max_bytes), + max_staleness_(max_staleness), + block_fetcher_(block_fetcher), + env_(env) { + if (max_staleness_ > 0) { + pruning_thread_.reset(env_->StartThread(ThreadOptions(), "TF_prune_FBC", + [this] { Prune(); })); + } + } + + ~RamFileBlockCache() override { + if (pruning_thread_) { + stop_pruning_thread_.Notify(); + // Destroying pruning_thread_ will block until Prune() receives the above + // notification and returns. + pruning_thread_.reset(); + } + } + + /// Read `n` bytes from `filename` starting at `offset` into `out`. This + /// method will return: + /// + /// 1) The error from the remote filesystem, if the read from the remote + /// filesystem failed. + /// 2) PRECONDITION_FAILED if the read from the remote filesystem succeeded, + /// but the read returned a partial block, and the LRU cache contained a + /// block at a higher offset (indicating that the partial block should have + /// been a full block). + /// 3) OUT_OF_RANGE if the read from the remote filesystem succeeded, but + /// the file contents do not extend past `offset` and thus nothing was + /// placed in `out`. + /// 4) OK otherwise (i.e. the read succeeded, and at least one byte was placed + /// in `out`). + Status Read(const string& filename, size_t offset, size_t n, char* buffer, + size_t* bytes_transferred) override; + + /// Remove all cached blocks for `filename`. + void RemoveFile(const string& filename) override LOCKS_EXCLUDED(mu_); + + /// Remove all cached data. + void Flush() LOCKS_EXCLUDED(mu_) override; + + /// Accessors for cache parameters. + size_t block_size() const override { return block_size_; } + size_t max_bytes() const override { return max_bytes_; } + uint64 max_staleness() const override { return max_staleness_; } + + /// The current size (in bytes) of the cache. + size_t CacheSize() const override LOCKS_EXCLUDED(mu_); + + private: + /// The size of the blocks stored in the LRU cache, as well as the size of the + /// reads from the underlying filesystem. + const size_t block_size_; + /// The maximum number of bytes (sum of block sizes) allowed in the LRU cache. + const size_t max_bytes_; + /// The maximum staleness of any block in the LRU cache, in seconds. + const uint64 max_staleness_; + /// The callback to read a block from the underlying filesystem. + const BlockFetcher block_fetcher_; + /// The Env from which we read timestamps. + Env* const env_; // not owned + + /// \brief The key type for the file block cache. + /// + /// The file block cache key is a {filename, offset} pair. + typedef std::pair Key; + + /// \brief The state of a block. + /// + /// A block begins in the CREATED stage. The first thread will attempt to read + /// the block from the filesystem, transitioning the state of the block to + /// FETCHING. After completing, if the read was successful the state should + /// be FINISHED. Otherwise the state should be ERROR. A subsequent read can + /// re-fetch the block if the state is ERROR. + enum class FetchState { + CREATED, + FETCHING, + FINISHED, + ERROR, + }; + + /// \brief A block of a file. + /// + /// A file block consists of the block data, the block's current position in + /// the LRU cache, the timestamp (seconds since epoch) at which the block + /// was cached, a coordination lock, and state & condition variables. + /// + /// Thread safety: + /// The iterator and timestamp fields should only be accessed while holding + /// the block-cache-wide mu_ instance variable. The state variable should only + /// be accessed while holding the Block's mu lock. The data vector should only + /// be accessed after state == FINISHED, and it should never be modified. + /// + /// In order to prevent deadlocks, never grab the block-cache-wide mu_ lock + /// AFTER grabbing any block's mu lock. It is safe to grab mu without locking + /// mu_. + struct Block { + /// The block data. + std::vector data; + /// A list iterator pointing to the block's position in the LRU list. + std::list::iterator lru_iterator; + /// A list iterator pointing to the block's position in the LRA list. + std::list::iterator lra_iterator; + /// The timestamp (seconds since epoch) at which the block was cached. + uint64 timestamp; + /// Mutex to guard state variable + mutex mu; + /// The state of the block. + FetchState state GUARDED_BY(mu) = FetchState::CREATED; + /// Wait on cond_var if state is FETCHING. + condition_variable cond_var; + }; + + /// \brief The block map type for the file block cache. + /// + /// The block map is an ordered map from Key to Block. + typedef std::map> BlockMap; + + /// Prune the cache by removing files with expired blocks. + void Prune() LOCKS_EXCLUDED(mu_); + + bool BlockNotStale(const std::shared_ptr& block) + EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// Look up a Key in the block cache. + std::shared_ptr Lookup(const Key& key) LOCKS_EXCLUDED(mu_); + + Status MaybeFetch(const Key& key, const std::shared_ptr& block) + LOCKS_EXCLUDED(mu_); + + /// Trim the block cache to make room for another entry. + void Trim() EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// Update the LRU iterator for the block at `key`. + Status UpdateLRU(const Key& key, const std::shared_ptr& block) + LOCKS_EXCLUDED(mu_); + + /// Remove all blocks of a file, with mu_ already held. + void RemoveFile_Locked(const string& filename) EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// Remove the block `entry` from the block map and LRU list, and update the + /// cache size accordingly. + void RemoveBlock(BlockMap::iterator entry) EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// The cache pruning thread that removes files with expired blocks. + std::unique_ptr pruning_thread_; + + /// Notification for stopping the cache pruning thread. + Notification stop_pruning_thread_; + + /// Guards access to the block map, LRU list, and cached byte count. + mutable mutex mu_; + + /// The block map (map from Key to Block). + BlockMap block_map_ GUARDED_BY(mu_); + + /// The LRU list of block keys. The front of the list identifies the most + /// recently accessed block. + std::list lru_list_ GUARDED_BY(mu_); + + /// The LRA (least recently added) list of block keys. The front of the list + /// identifies the most recently added block. + /// + /// Note: blocks are added to lra_list_ only after they have successfully been + /// fetched from the underlying block store. + std::list lra_list_ GUARDED_BY(mu_); + + /// The combined number of bytes in all of the cached blocks. + size_t cache_size_ GUARDED_BY(mu_) = 0; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_CLOUD_RAM_FILE_BLOCK_CACHE_H_ diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/ram_file_block_cache_test.cc similarity index 92% rename from tensorflow/core/platform/cloud/file_block_cache_test.cc rename to tensorflow/core/platform/cloud/ram_file_block_cache_test.cc index 596fdbf19e..d555b682a6 100644 --- a/tensorflow/core/platform/cloud/file_block_cache_test.cc +++ b/tensorflow/core/platform/cloud/ram_file_block_cache_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/cloud/file_block_cache.h" +#include "tensorflow/core/platform/cloud/ram_file_block_cache.h" #include #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -25,8 +25,8 @@ limitations under the License. namespace tensorflow { namespace { -Status ReadCache(FileBlockCache* cache, const string& filename, size_t offset, - size_t n, std::vector* out) { +Status ReadCache(RamFileBlockCache* cache, const string& filename, + size_t offset, size_t n, std::vector* out) { out->clear(); out->resize(n, 0); size_t bytes_transferred = 0; @@ -37,7 +37,7 @@ Status ReadCache(FileBlockCache* cache, const string& filename, size_t offset, return status; } -TEST(FileBlockCacheTest, PassThrough) { +TEST(RamFileBlockCacheTest, PassThrough) { const string want_filename = "foo/bar"; const size_t want_offset = 42; const size_t want_n = 1024; @@ -54,9 +54,9 @@ TEST(FileBlockCacheTest, PassThrough) { return Status::OK(); }; // If block_size, max_bytes, or both are zero, the cache is a pass-through. - FileBlockCache cache1(1, 0, 0, fetcher); - FileBlockCache cache2(0, 1, 0, fetcher); - FileBlockCache cache3(0, 0, 0, fetcher); + RamFileBlockCache cache1(1, 0, 0, fetcher); + RamFileBlockCache cache2(0, 1, 0, fetcher); + RamFileBlockCache cache3(0, 0, 0, fetcher); std::vector out; TF_EXPECT_OK(ReadCache(&cache1, want_filename, want_offset, want_n, &out)); EXPECT_EQ(calls, 1); @@ -66,7 +66,7 @@ TEST(FileBlockCacheTest, PassThrough) { EXPECT_EQ(calls, 3); } -TEST(FileBlockCacheTest, BlockAlignment) { +TEST(RamFileBlockCacheTest, BlockAlignment) { // Initialize a 256-byte buffer. This is the file underlying the reads we'll // do in this test. const size_t size = 256; @@ -89,7 +89,7 @@ TEST(FileBlockCacheTest, BlockAlignment) { for (size_t block_size = 2; block_size <= 4; block_size++) { // Make a cache of N-byte block size (1 block) and verify that reads of // varying offsets and lengths return correct data. - FileBlockCache cache(block_size, block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_size, 0, fetcher); for (size_t offset = 0; offset < 10; offset++) { for (size_t n = block_size - 2; n <= block_size + 2; n++) { std::vector got; @@ -117,7 +117,7 @@ TEST(FileBlockCacheTest, BlockAlignment) { } } -TEST(FileBlockCacheTest, CacheHits) { +TEST(RamFileBlockCacheTest, CacheHits) { const size_t block_size = 16; std::set calls; auto fetcher = [&calls, block_size](const string& filename, size_t offset, @@ -132,7 +132,7 @@ TEST(FileBlockCacheTest, CacheHits) { return Status::OK(); }; const uint32 block_count = 256; - FileBlockCache cache(block_size, block_count * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_count * block_size, 0, fetcher); std::vector out; out.resize(block_count, 0); // The cache has space for `block_count` blocks. The loop with i = 0 should @@ -146,7 +146,7 @@ TEST(FileBlockCacheTest, CacheHits) { } } -TEST(FileBlockCacheTest, OutOfRange) { +TEST(RamFileBlockCacheTest, OutOfRange) { // Tests reads of a 24-byte file with block size 16. const size_t block_size = 16; const size_t file_size = 24; @@ -172,7 +172,7 @@ TEST(FileBlockCacheTest, OutOfRange) { *bytes_transferred = bytes_to_copy; return Status::OK(); }; - FileBlockCache cache(block_size, block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_size, 0, fetcher); std::vector out; // Reading the first 16 bytes should be fine. TF_EXPECT_OK(ReadCache(&cache, "", 0, block_size, &out)); @@ -191,7 +191,7 @@ TEST(FileBlockCacheTest, OutOfRange) { EXPECT_EQ(out.size(), file_size - block_size); } -TEST(FileBlockCacheTest, Inconsistent) { +TEST(RamFileBlockCacheTest, Inconsistent) { // Tests the detection of interrupted reads leading to partially filled blocks // where we expected complete blocks. const size_t block_size = 16; @@ -205,7 +205,7 @@ TEST(FileBlockCacheTest, Inconsistent) { *bytes_transferred = 1; return Status::OK(); }; - FileBlockCache cache(block_size, 2 * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, 2 * block_size, 0, fetcher); std::vector out; // Read the second block; this should yield an OK status and a single byte. TF_EXPECT_OK(ReadCache(&cache, "", block_size, block_size, &out)); @@ -216,7 +216,7 @@ TEST(FileBlockCacheTest, Inconsistent) { EXPECT_EQ(status.code(), error::INTERNAL); } -TEST(FileBlockCacheTest, LRU) { +TEST(RamFileBlockCacheTest, LRU) { const size_t block_size = 16; std::list calls; auto fetcher = [&calls, block_size](const string& filename, size_t offset, @@ -233,7 +233,7 @@ TEST(FileBlockCacheTest, LRU) { return Status::OK(); }; const uint32 block_count = 2; - FileBlockCache cache(block_size, block_count * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_count * block_size, 0, fetcher); std::vector out; // Read blocks from the cache, and verify the LRU behavior based on the // fetcher calls that the cache makes. @@ -265,7 +265,7 @@ TEST(FileBlockCacheTest, LRU) { TF_EXPECT_OK(ReadCache(&cache, "", 0, 1, &out)); } -TEST(FileBlockCacheTest, MaxStaleness) { +TEST(RamFileBlockCacheTest, MaxStaleness) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -278,7 +278,7 @@ TEST(FileBlockCacheTest, MaxStaleness) { std::unique_ptr env(new NowSecondsEnv); // Create a cache with max staleness of 2 seconds, and verify that it works as // expected. - FileBlockCache cache1(8, 16, 2 /* max staleness */, fetcher, env.get()); + RamFileBlockCache cache1(8, 16, 2 /* max staleness */, fetcher, env.get()); // Execute the first read to load the block. TF_EXPECT_OK(ReadCache(&cache1, "", 0, 1, &out)); EXPECT_EQ(calls, 1); @@ -294,7 +294,7 @@ TEST(FileBlockCacheTest, MaxStaleness) { // as expected. calls = 0; env->SetNowSeconds(0); - FileBlockCache cache2(8, 16, 0 /* max staleness */, fetcher, env.get()); + RamFileBlockCache cache2(8, 16, 0 /* max staleness */, fetcher, env.get()); // Execute the first read to load the block. TF_EXPECT_OK(ReadCache(&cache2, "", 0, 1, &out)); EXPECT_EQ(calls, 1); @@ -305,7 +305,7 @@ TEST(FileBlockCacheTest, MaxStaleness) { EXPECT_EQ(calls, 1); } -TEST(FileBlockCacheTest, RemoveFile) { +TEST(RamFileBlockCacheTest, RemoveFile) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -321,7 +321,7 @@ TEST(FileBlockCacheTest, RemoveFile) { }; // This cache has space for 4 blocks; we'll read from two files. const size_t n = 3; - FileBlockCache cache(8, 32, 0, fetcher); + RamFileBlockCache cache(8, 32, 0, fetcher); std::vector out; std::vector a(n, 'a'); std::vector b(n, 'b'); @@ -367,7 +367,7 @@ TEST(FileBlockCacheTest, RemoveFile) { EXPECT_EQ(calls, 6); } -TEST(FileBlockCacheTest, Prune) { +TEST(RamFileBlockCacheTest, Prune) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -381,7 +381,7 @@ TEST(FileBlockCacheTest, Prune) { std::unique_ptr env(new NowSecondsEnv); uint64 now = Env::Default()->NowSeconds(); env->SetNowSeconds(now); - FileBlockCache cache(8, 32, 1 /* max staleness */, fetcher, env.get()); + RamFileBlockCache cache(8, 32, 1 /* max staleness */, fetcher, env.get()); // Read three blocks into the cache, and advance the timestamp by one second // with each read. Start with a block of "a" at the current timestamp `now`. TF_EXPECT_OK(ReadCache(&cache, "a", 0, 1, &out)); @@ -426,7 +426,7 @@ TEST(FileBlockCacheTest, Prune) { EXPECT_EQ(cache.CacheSize(), 0); } -TEST(FileBlockCacheTest, ParallelReads) { +TEST(RamFileBlockCacheTest, ParallelReads) { // This fetcher won't respond until either `callers` threads are calling it // concurrently (at which point it will respond with success to all callers), // or 10 seconds have elapsed (at which point it will respond with an error). @@ -444,7 +444,7 @@ TEST(FileBlockCacheTest, ParallelReads) { return Status::OK(); }; const int block_size = 8; - FileBlockCache cache(block_size, 2 * callers * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, 2 * callers * block_size, 0, fetcher); std::vector> threads; for (int i = 0; i < callers; i++) { threads.emplace_back( @@ -461,7 +461,7 @@ TEST(FileBlockCacheTest, ParallelReads) { // executed, or 10 seconds have passed). } -TEST(FileBlockCacheTest, CoalesceConcurrentReads) { +TEST(RamFileBlockCacheTest, CoalesceConcurrentReads) { // Concurrent reads to the same file blocks should be de-duplicated. const size_t block_size = 16; int num_requests = 0; @@ -479,7 +479,7 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) { Env::Default()->SleepForMicroseconds(100000); // 0.1 secs return Status::OK(); }; - FileBlockCache cache(block_size, block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_size, 0, fetcher); // Fork off thread for parallel read. std::unique_ptr concurrent( Env::Default()->StartThread({}, "concurrent", [&cache, block_size] { @@ -496,7 +496,7 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) { EXPECT_EQ(1, num_requests); } -TEST(FileBlockCacheTest, Flush) { +TEST(RamFileBlockCacheTest, Flush) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -505,7 +505,7 @@ TEST(FileBlockCacheTest, Flush) { *bytes_transferred = n; return Status::OK(); }; - FileBlockCache cache(16, 32, 0, fetcher); + RamFileBlockCache cache(16, 32, 0, fetcher); std::vector out; TF_EXPECT_OK(ReadCache(&cache, "", 0, 16, &out)); TF_EXPECT_OK(ReadCache(&cache, "", 0, 16, &out)); -- GitLab From 45f56944c862a8c67c34efedcee501f365a08aee Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Fri, 2 Mar 2018 12:25:13 -0800 Subject: [PATCH 268/311] FreezeSavedModel support for ResourceVariables. PiperOrigin-RevId: 187653676 --- tensorflow/cc/tools/BUILD | 1 + tensorflow/cc/tools/freeze_saved_model.cc | 55 +++- .../cc/tools/freeze_saved_model_test.cc | 268 +++++++++++------- 3 files changed, 211 insertions(+), 113 deletions(-) diff --git a/tensorflow/cc/tools/BUILD b/tensorflow/cc/tools/BUILD index 97f66e79b8..f413a5cc52 100644 --- a/tensorflow/cc/tools/BUILD +++ b/tensorflow/cc/tools/BUILD @@ -32,6 +32,7 @@ tf_cc_test( deps = [ ":freeze_saved_model", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:resource_variable_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework_internal", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/cc/tools/freeze_saved_model.cc b/tensorflow/cc/tools/freeze_saved_model.cc index ddf372cdef..4ddddcb586 100644 --- a/tensorflow/cc/tools/freeze_saved_model.cc +++ b/tensorflow/cc/tools/freeze_saved_model.cc @@ -75,16 +75,13 @@ void GetNodeNameToNodeDefMap( // variable nodes to convert. void GetReachableNodesAndVariables( GraphDef* graph_def, const std::unordered_set& outputs, + const std::unordered_map& name_to_node_map, std::unordered_set* reachable_node_names, std::unordered_set* variable_node_names) { // TODO(suharshs): Add support for ResourceVariables. static const std::unordered_set* kVariableTypes = - new std::unordered_set({"Variable", "VariableV2"}); - // name_to_node_map is needed to get the inputs from the NodeDef corresponding - // the a string node name. These inputs are used when doing our backwards - // traversal. - std::unordered_map name_to_node_map; - GetNodeNameToNodeDefMap(graph_def, &name_to_node_map); + new std::unordered_set({"Variable", "VariableV2", "VarHandleOp"}); + std::queue nodes_to_visit; for (const string& tensor_name : outputs) { // We need to strip off the tensor part to get the node name. @@ -99,7 +96,7 @@ void GetReachableNodesAndVariables( continue; } reachable_node_names->insert(node_name); - NodeDef* node = name_to_node_map[node_name]; + NodeDef* node = name_to_node_map.at(node_name); if (kVariableTypes->find(node->op()) != kVariableTypes->end()) { variable_node_names->insert(node->name()); } @@ -111,7 +108,9 @@ void GetReachableNodesAndVariables( // Gets a map from variable name to variable value. Status GetVariableNameToTensorMap( - Session* session, std::unordered_set variable_names_set, + Session* session, + const std::unordered_map& name_to_node_map, + std::unordered_set variable_names_set, std::unordered_map* variable_name_to_value_map) { if (variable_names_set.empty()) { return Status::OK(); @@ -120,8 +119,14 @@ Status GetVariableNameToTensorMap( std::vector tensor_names; for (const string& node_name : variable_names_set) { variable_names.push_back(node_name); - // We need to run tensors, so append ":0". - tensor_names.push_back(node_name + ":0"); + NodeDef* node_def = name_to_node_map.at(node_name); + if (node_def->op() == "VarHandleOp") { + // If this is a resource variable, we have to run the corresponding + // ReadVariableOp. + tensor_names.push_back(node_name + "/Read/ReadVariableOp:0"); + } else { + tensor_names.push_back(node_name + ":0"); + } } std::vector outputs; TF_RETURN_IF_ERROR( @@ -143,6 +148,15 @@ void ConvertVariableToConstant(const NodeDef& variable_node, (*const_node->mutable_attr())["value"].mutable_tensor()); } +// Converts a ReadVariableOp NodeDef to an Identity NodeDef. +void ConvertReadVariableOpToIdentity(const NodeDef& node, + NodeDef* identity_node) { + identity_node->set_name(node.name()); + identity_node->set_op("Identity"); + (*identity_node->mutable_attr())["T"] = node.attr().at("dtype"); + identity_node->add_input(node.input(0)); +} + // Freezes the subgraph of all nodes needed by `outputs`. Status FreezeGraphDef(const SavedModelBundle& saved_model_bundle, const std::unordered_set& outputs, @@ -155,14 +169,19 @@ Status FreezeGraphDef(const SavedModelBundle& saved_model_bundle, if (graph_def.node_size() == 0) { return Status::OK(); } + // name_to_node_map is needed to get the inputs from the NodeDef corresponding + // the a string node name. These inputs are used when doing our backwards + // traversal. + std::unordered_map name_to_node_map; + GetNodeNameToNodeDefMap(&graph_def, &name_to_node_map); std::unordered_set reachable_node_names; std::unordered_set variable_node_names; - GetReachableNodesAndVariables(&graph_def, outputs, &reachable_node_names, - &variable_node_names); + GetReachableNodesAndVariables(&graph_def, outputs, name_to_node_map, + &reachable_node_names, &variable_node_names); std::unordered_map variable_to_value_map; - TF_RETURN_IF_ERROR( - GetVariableNameToTensorMap(saved_model_bundle.session.get(), - variable_node_names, &variable_to_value_map)); + TF_RETURN_IF_ERROR(GetVariableNameToTensorMap( + saved_model_bundle.session.get(), name_to_node_map, variable_node_names, + &variable_to_value_map)); // We copy the nodes in the same order they were in the original graph_def. for (const NodeDef& node : graph_def.node()) { if (reachable_node_names.find(node.name()) == reachable_node_names.end()) { @@ -171,6 +190,12 @@ Status FreezeGraphDef(const SavedModelBundle& saved_model_bundle, if (variable_node_names.find(node.name()) != variable_node_names.end()) { ConvertVariableToConstant(node, variable_to_value_map[node.name()], frozen_graph_def->add_node()); + } else if (node.op() == "ReadVariableOp" && + variable_node_names.find(node.input(0)) != + variable_node_names.end()) { + // If the node is a ReadVariableOp, its input VarHandleOp will be + // converted to a Constant, so we will need to convert it to an Identity. + ConvertReadVariableOpToIdentity(node, frozen_graph_def->add_node()); } else { // If the node isn't a variable, just copy the node as-is. *frozen_graph_def->add_node() = node; diff --git a/tensorflow/cc/tools/freeze_saved_model_test.cc b/tensorflow/cc/tools/freeze_saved_model_test.cc index 52a81a5028..cd35fd3b95 100644 --- a/tensorflow/cc/tools/freeze_saved_model_test.cc +++ b/tensorflow/cc/tools/freeze_saved_model_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/cc/tools/freeze_saved_model.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/graph.pb.h" @@ -113,6 +114,160 @@ class FreezeTest : public ::testing::Test { test::ExpectTensorEqual(unfrozen_outputs[0], frozen_outputs[0]); } + + void TestFreezeGraphWithoutDependentVariables(bool use_resource) { + // Test freezing a graph with variables that are not needed by the outputs + // in the SignatureDef. The resulting graph shouldn't be frozen, but + // non-dependent nodes should be pruned. + SavedModelBundle saved_model_bundle; + GraphDef graph_def; + Scope scope = Scope::NewRootScope(); + Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); + Output b = ops::Const(scope.WithOpName("b"), 10.0f, {}); + Output c = ops::Mul(scope.WithOpName("c"), a, b); + if (use_resource) { + Output var = + ops::VarHandleOp(scope.WithOpName("var"), DataType::DT_FLOAT, {}); + Output read_var = ops::ReadVariableOp( + scope.WithOpName("var/Read/ReadVariableOp"), var, DataType::DT_FLOAT); + auto assign = ops::AssignVariableOp(scope.WithOpName("assign"), var, a); + } else { + Output var = + ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); + Output assign = ops::Assign(scope.WithOpName("assign"), var, a); + } + + TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); + // "c" isnt dependent on the variable, so nothing should be frozen. + TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( + graph_def, {"c:0"}, "assign", &saved_model_bundle)); + + GraphDef frozen_graph_def; + std::unordered_set inputs; + std::unordered_set outputs; + TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, + &inputs, &outputs)); + + GraphDef expected_graph_def; + Scope expected_scope = Scope::NewRootScope(); + Output expected_a = ops::Const(expected_scope.WithOpName("a"), 10.0f, {}); + Output expected_b = ops::Const(expected_scope.WithOpName("b"), 10.0f, {}); + Output expected_c = + ops::Mul(expected_scope.WithOpName("c"), expected_a, expected_b); + TF_ASSERT_OK(expected_scope.ToGraphDef(&expected_graph_def)); + + GraphDefEqual(frozen_graph_def, expected_graph_def); + + RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), + frozen_graph_def, "c:0"); + } + + void TestFreezeGraphWithDependentVariables(bool use_resource) { + // Test freezing a graph with variables that are needed by outputs in the + // SignatureDef. The variables should be frozen. + SavedModelBundle saved_model_bundle; + GraphDef graph_def; + Scope scope = Scope::NewRootScope(); + Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); + Output read_var; + if (use_resource) { + Output var = + ops::VarHandleOp(scope.WithOpName("var"), DataType::DT_FLOAT, {}); + read_var = ops::ReadVariableOp( + scope.WithOpName("var/Read/ReadVariableOp"), var, DataType::DT_FLOAT); + auto assign = ops::AssignVariableOp(scope.WithOpName("assign"), var, a); + } else { + Output read_var = + ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); + Output assign = ops::Assign(scope.WithOpName("assign"), read_var, a); + } + Output c = ops::Mul(scope.WithOpName("c"), a, read_var); + TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); + // "c" isnt dependent on the variable, so nothing should be frozen. + TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( + graph_def, {"c:0"}, "assign", &saved_model_bundle)); + + GraphDef frozen_graph_def; + std::unordered_set inputs; + std::unordered_set outputs; + TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, + &inputs, &outputs)); + + // If using normal variables there should be 3 nodes in the resulting + // graph_def. If using resource variables there should be 4 nodes in the + // resulting graph_def. + // In both cases, none should be variables. + size_t expected_nodes = use_resource ? 4 : 3; + EXPECT_EQ(frozen_graph_def.node_size(), expected_nodes); + for (const NodeDef& node : frozen_graph_def.node()) { + EXPECT_NE(node.op(), "Variable") << node.name(); + EXPECT_NE(node.op(), "VariableV2") << node.name(); + EXPECT_NE(node.op(), "VarHandleOp") << node.name(); + EXPECT_NE(node.op(), "ReadVariableOp") << node.name(); + } + + RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), + frozen_graph_def, "c:0"); + } + + void TestFreezeGraphWithAndWithoutDependentVariables(bool use_resource) { + // Test freezing a graph with some variables that are needed and not needed + // by + // the outputs in the SignatureDef. The resulting graph should only freeze + // dependent variables. + SavedModelBundle saved_model_bundle; + GraphDef graph_def; + Scope scope = Scope::NewRootScope(); + Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); + Output read_var; + + if (use_resource) { + Output var = + ops::VarHandleOp(scope.WithOpName("var"), DataType::DT_FLOAT, {}); + read_var = ops::ReadVariableOp( + scope.WithOpName("var/Read/ReadVariableOp"), var, DataType::DT_FLOAT); + auto assign = ops::AssignVariableOp(scope.WithOpName("assign"), var, a); + Output var_1 = + ops::VarHandleOp(scope.WithOpName("var_1"), DataType::DT_FLOAT, {}); + Output read_var_1 = + ops::ReadVariableOp(scope.WithOpName("var_1/Read/ReadVariableOp"), + var, DataType::DT_FLOAT); + auto assign_1 = + ops::AssignVariableOp(scope.WithOpName("assign_1"), var_1, a); + } else { + read_var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); + Output assign = ops::Assign(scope.WithOpName("assign"), read_var, a); + Output var_1 = + ops::Variable(scope.WithOpName("var_1"), {}, DataType::DT_FLOAT); + Output assign_1 = ops::Assign(scope.WithOpName("assign_1"), var_1, a); + } + + Output c = ops::Mul(scope.WithOpName("c"), a, read_var); + TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); + // "c" isnt dependent on the variable, so nothing should be frozen. + TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( + graph_def, {"c:0"}, "assign", &saved_model_bundle)); + + GraphDef frozen_graph_def; + std::unordered_set inputs; + std::unordered_set outputs; + TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, + &inputs, &outputs)); + + // There should be 3 nodes in the resulting graph_def, and none should be + // variables. + size_t expected_nodes = use_resource ? 4 : 3; + EXPECT_EQ(frozen_graph_def.node_size(), expected_nodes); + for (const NodeDef& node : frozen_graph_def.node()) { + EXPECT_NE(node.op(), "Variable") << node.name(); + EXPECT_NE(node.op(), "VariableV2") << node.name(); + EXPECT_NE(node.op(), "VarHandleOp") << node.name(); + EXPECT_NE(node.op(), "ReadVariableOp") << node.name(); + } + + RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), + frozen_graph_def, "c:0"); + } }; TEST_F(FreezeTest, InputsAndOutputsSingleSignatureDef) { @@ -196,111 +351,28 @@ TEST_F(FreezeTest, GraphDefWithNoVariables) { GraphDefEqual(frozen_graph_def, graph_def); } -TEST_F(FreezeTest, GraphDefWithVariablesNotNeededByOutputs) { - // Test freezing a graph with variables that are not needed by the outputs in - // the SignatureDef. The resulting graph shouldn't be frozen, but - // non-dependent nodes should be pruned. - SavedModelBundle saved_model_bundle; - GraphDef graph_def; - Scope scope = Scope::NewRootScope(); - Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); - Output b = ops::Const(scope.WithOpName("b"), 10.0f, {}); - Output c = ops::Mul(scope.WithOpName("c"), a, b); - Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); - Output assign = ops::Assign(scope.WithOpName("assign"), var, a); - TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); - // "c" isnt dependent on the variable, so nothing should be frozen. - TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( - graph_def, {"c:0"}, assign.name(), &saved_model_bundle)); - - GraphDef frozen_graph_def; - std::unordered_set inputs; - std::unordered_set outputs; - TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs, - &outputs)); - - GraphDef expected_graph_def; - Scope expected_scope = Scope::NewRootScope(); - Output expected_a = ops::Const(expected_scope.WithOpName("a"), 10.0f, {}); - Output expected_b = ops::Const(expected_scope.WithOpName("b"), 10.0f, {}); - Output expected_c = - ops::Mul(expected_scope.WithOpName("c"), expected_a, expected_b); - TF_ASSERT_OK(expected_scope.ToGraphDef(&expected_graph_def)); - - GraphDefEqual(frozen_graph_def, expected_graph_def); - - RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), - frozen_graph_def, "c:0"); +TEST_F(FreezeTest, GraphDefWithoutDependentVariables) { + TestFreezeGraphWithoutDependentVariables(false); } -TEST_F(FreezeTest, GraphDefWithVariablesNeededByOutputs) { - // Test freezing a graph with variables that are needed by outputs in the - // SignatureDef. The variables should be frozen. - SavedModelBundle saved_model_bundle; - GraphDef graph_def; - Scope scope = Scope::NewRootScope(); - Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); - Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); - Output c = ops::Mul(scope.WithOpName("c"), a, var); - Output assign = ops::Assign(scope.WithOpName("assign"), var, a); - TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); - // "c" isnt dependent on the variable, so nothing should be frozen. - TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( - graph_def, {"c:0"}, assign.name(), &saved_model_bundle)); - - GraphDef frozen_graph_def; - std::unordered_set inputs; - std::unordered_set outputs; - TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs, - &outputs)); - - // There should be 3 nodes in the resulting graph_def, and none should be - // variables. - EXPECT_EQ(frozen_graph_def.node_size(), 3); - for (const NodeDef& node : frozen_graph_def.node()) { - EXPECT_NE(node.op(), "Variable") << node.name(); - EXPECT_NE(node.op(), "VariableV2") << node.name(); - } - - RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), - frozen_graph_def, "c:0"); +TEST_F(FreezeTest, GraphDefWithoutDependentResourceVariables) { + TestFreezeGraphWithoutDependentVariables(true); } -TEST_F(FreezeTest, GraphDefWithVariablesNeededAndNotNeededByOutputs) { - // Test freezing a graph with some variables that are needed and not needed by - // the outputs in the SignatureDef. The resulting graph should only freeze - // dependent variables. - SavedModelBundle saved_model_bundle; - GraphDef graph_def; - Scope scope = Scope::NewRootScope(); - Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); - Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); - Output c = ops::Mul(scope.WithOpName("c"), a, var); - Output assign = ops::Assign(scope.WithOpName("assign"), var, a); - Output var_1 = - ops::Variable(scope.WithOpName("var_1"), {}, DataType::DT_FLOAT); - Output assign_1 = ops::Assign(scope.WithOpName("assign_1"), var, a); - TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); - // "c" isnt dependent on the variable, so nothing should be frozen. - TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( - graph_def, {"c:0"}, assign.name(), &saved_model_bundle)); +TEST_F(FreezeTest, GraphDefWithDependentVariables) { + TestFreezeGraphWithDependentVariables(false); +} - GraphDef frozen_graph_def; - std::unordered_set inputs; - std::unordered_set outputs; - TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs, - &outputs)); +TEST_F(FreezeTest, GraphDefWithDependentResourceVariables) { + TestFreezeGraphWithDependentVariables(true); +} - // There should be 3 nodes in the resulting graph_def, and none should be - // variables. - EXPECT_EQ(frozen_graph_def.node_size(), 3); - for (const NodeDef& node : frozen_graph_def.node()) { - EXPECT_NE(node.op(), "Variable") << node.name(); - EXPECT_NE(node.op(), "VariableV2") << node.name(); - } +TEST_F(FreezeTest, GraphDefWithAndWithoutDependentVariables) { + TestFreezeGraphWithAndWithoutDependentVariables(false); +} - RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), - frozen_graph_def, "c:0"); +TEST_F(FreezeTest, GraphDefWithAndWithoutDependentResourceVariables) { + TestFreezeGraphWithAndWithoutDependentVariables(true); } } // namespace -- GitLab From faab0cf5407dcf11967371b51b97f8eef6964a35 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Fri, 2 Mar 2018 12:33:40 -0800 Subject: [PATCH 269/311] Exclude flaky tests for cuda_on_cpu. PiperOrigin-RevId: 187654568 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 1 + tensorflow/contrib/eager/python/examples/spinn/BUILD | 5 ++++- tensorflow/python/BUILD | 6 +++++- tensorflow/python/feature_column/BUILD | 5 ++++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 82cd276ce8..10cb05ece1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -168,6 +168,7 @@ py_test( srcs = ["interleave_dataset_op_test.py"], srcs_version = "PY2AND3", tags = [ + "no_cuda_on_cpu_tap", "no_oss", "no_pip", ], diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD index a1f8a759e2..98d01ad1d5 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/BUILD +++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD @@ -38,5 +38,8 @@ cuda_py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", ], - tags = ["no_pip"], # because spinn.py is under third_party/. + tags = [ + "no_cuda_on_cpu_tap", + "no_pip", # because spinn.py is under third_party/. + ], ) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index cb54cebf0f..f282abb0a5 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3945,7 +3945,10 @@ py_test( size = "small", srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], + tags = [ + "no_cuda_on_cpu_tap", + "no_windows", + ], deps = [ ":client", ":client_testlib", @@ -4739,6 +4742,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "grappler", + "no_cuda_on_cpu_tap", "no_pip", ], deps = [ diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index a758f8a4fc..238a90b67d 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -74,7 +74,10 @@ py_test( srcs = ["feature_column_test.py"], data = [":vocabulary_testdata"], srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_cuda_on_cpu_tap", + "no_pip", + ], deps = [ ":feature_column", ":feature_column_py", -- GitLab From 85daa2e4553e49ca6ab2fbb412b18c23b5399524 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 12:43:22 -0800 Subject: [PATCH 270/311] TFTS: Switch more variables to ResourceVariables to avoid race conditions The LSTM example test was a bit flaky. PiperOrigin-RevId: 187655714 --- .../contrib/timeseries/python/timeseries/head.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py index f0330bfbbd..8731b10923 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/head.py +++ b/tensorflow/contrib/timeseries/python/timeseries/head.py @@ -73,7 +73,10 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _train_ops(self, features): """Add training ops to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope( + "model", + # Use ResourceVariables to avoid race conditions. + use_resource=True): model_outputs = self.state_manager.define_loss( self.model, features, estimator_lib.ModeKeys.TRAIN) @@ -107,7 +110,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _evaluate_ops(self, features): """Add ops for evaluation (aka filtering) to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope("model", use_resource=True): model_outputs = self.state_manager.define_loss( self.model, features, estimator_lib.ModeKeys.EVAL) metrics = {} @@ -128,7 +131,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _predict_ops(self, features): """Add ops for prediction to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope("model", use_resource=True): prediction = self.model.predict(features=features) prediction[feature_keys.PredictionResults.TIMES] = features[ feature_keys.PredictionFeatures.TIMES] @@ -137,7 +140,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _serving_ops(self, features): """Add ops for serving to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope("model", use_resource=True): prediction_outputs = self.model.predict(features=features) with variable_scope.variable_scope("model", reuse=True): filtering_outputs = self.state_manager.define_loss( -- GitLab From e0fac18b63e80963d42cb1e39243d84ae86ae01a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 12:58:08 -0800 Subject: [PATCH 271/311] Automated g4 rollback of changelist 187582263 PiperOrigin-RevId: 187657654 --- .../grappler/optimizers/memory_optimizer.cc | 23 +++++++------ .../grappler/optimizers/memory_optimizer.h | 10 +++--- .../grappler/optimizers/meta_optimizer.cc | 4 +-- .../core/protobuf/rewriter_config.proto | 19 ++++++----- .../python/grappler/memory_optimizer_test.py | 32 ++++++++++++++++++- 5 files changed, 58 insertions(+), 30 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 694139fa50..27e9d2c78d 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -413,7 +413,7 @@ void RecomputeSubgraph( } void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix, + const string& recomputation_targets_name_scope, GraphDef* graph, const GrapplerItem& item) { if (optimization_level != RewriterConfig::RECOMPUTATION_HEURISTICS && optimization_level != RewriterConfig::HEURISTICS && @@ -438,15 +438,14 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, feeds.insert(NodeName(feed.first)); } std::function is_target = - [&recomputation_targets_name_prefix](const NodeDef& node) { - // Nodes whose inputs we may want to recompute. Typically targets will - // be gradients (recomputation_targets_name_prefix="gradients/"), - // although the prefix is configurable since gradients may be created - // in a name scope. - // TODO(allenl): Use a static schedule - // (grappler::EstimateEarliestExecutionTimes) to recompute only nodes - // whose outputs will sit around for a while. - return node.name().find(recomputation_targets_name_prefix) == 0; + [&recomputation_targets_name_scope](const NodeDef& node) { + // Nodes whose inputs we may want to recompute. This matches node names + // that contain recomputation_targets_name_scope as a name scope, + // meaning it either begins with or contains the name scope. + // Defaults to "gradients/" which will match any node names that begins + // with "gradients/" or contains "/gradients/". + return node.name().find(recomputation_targets_name_scope) == 0 || + node.name().find("/" + recomputation_targets_name_scope) != -1; }; if (optimization_level == RewriterConfig::RECOMPUTATION_HEURISTICS || @@ -1225,8 +1224,8 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = item.graph; RecomputationRewritingPass(optimization_level_, - recomputation_targets_name_prefix_, - optimized_graph, item); + recomputation_targets_name_scope_, optimized_graph, + item); GrapplerItem optimized_item(item, std::move(*optimized_graph)); std::unordered_set skip_list; diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index c3dd0c45c6..5c555a2674 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -27,14 +27,14 @@ class MemoryOptimizer : public GraphOptimizer { public: // optimization_level: Controls the level of autonomy for the memory // optimizer. See RewriterConfig::memory_optimization. - // recomputation_targets_name_prefix: Name prefix for potential outputs of + // recomputation_targets_name_scope: Name scope for potential outputs of // recomputations. See - // RewriterConfig::memory_optimizer_target_node_name_prefix. + // RewriterConfig::memory_optimizer_target_node_name_scope. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix = "gradients/") + const string& recomputation_targets_name_scope = "gradients/") : optimization_level_(optimization_level), - recomputation_targets_name_prefix_(recomputation_targets_name_prefix) {} + recomputation_targets_name_scope_(recomputation_targets_name_scope) {} ~MemoryOptimizer() override {} string name() const override { return "memory_optimizer"; }; @@ -47,7 +47,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; - string recomputation_targets_name_prefix_; + string recomputation_targets_name_scope_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 72d7b94dc8..fff1e354f4 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -119,7 +119,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, std::unique_ptr(new LayoutOptimizer())); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_prefix().empty()) { + if (cfg_.memory_optimizer_target_node_name_scope().empty()) { optimizers.push_back(std::unique_ptr( // Use the default target node name prefix "gradients/" new MemoryOptimizer(cfg_.memory_optimization()))); @@ -127,7 +127,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back( std::unique_ptr(new MemoryOptimizer( cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_prefix()))); + cfg_.memory_optimizer_target_node_name_scope()))); } } if (cfg_.auto_parallel().enable()) { diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 9ebf217811..0ccf2149f2 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -78,16 +78,15 @@ message RewriterConfig { // effect on manually requested memory optimization passes in the optimizers // field. MemOptType memory_optimization = 4; - // The prefix for nodes which are valid outputs of recomputations. Inputs to - // nodes with this name prefix may be recomputed (subject either to manual - // annotation of those input nodes or to manual annotation and heuristics - // depending on memory_optimization), but the prefixed nodes themselves will - // not be recomputed. Typically this will be "gradients/", indicating that - // activations from the forward pass of a graph may be recomputed as inputs to - // gradients, but may be adjusted if gradients are inside a name scope or if - // inputs to non-gradients should be recomputed. Defaults to "gradients/" if - // empty or not set. - string memory_optimizer_target_node_name_prefix = 6; + // A node name scope for node names which are valid outputs of recompuations. + // Inputs to nodes that match this scope may be recomputed (subject either to + // manual annotation of those input nodes or to manual annotation and + // heuristics depending on memory_optimization), but the nodes themselves will + // not be recomputed. This matches any sub-scopes as well, meaning the scope + // can appear not just as a top-level scope. For example, if the value is + // "gradients/", the default, it will match node name "gradients/foo", + // "foo/gradients/bar", but not "foo_gradients/" + string memory_optimizer_target_node_name_scope = 6; // Configures AutoParallel optimization passes either through the // meta-optimizer or when manually specified through the optimizers field. diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 948911f099..4df959ce04 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -162,7 +162,8 @@ class MemoryOptimizerRecomputeTest(test.TestCase): arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_prefix='optimizer/gradients/'), + # Checks that name scope "gradients/" also match sub-scope. + memory_optimizer_target_node_name_scope='gradients/'), original_metagraph) self.assertGreater( len(rewritten_graph_def.node), @@ -176,6 +177,35 @@ class MemoryOptimizerRecomputeTest(test.TestCase): len([node for node in rewritten_graph_def.node if 'Recomputed/' in node.name])) + def testRewritingNameScopedGradientNamesScope(self): + """Tests that rewriting occurs with non-standard gradient names.""" + (original_metagraph, _, _, + _) = self._GetMetaGraph(optimizer_scope_name='foo/bar') + rewritten_graph_def = tf_optimizer.OptimizeGraph( + rewriter_config_pb2.RewriterConfig( + disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, + dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, + layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, + arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, + memory_optimization=rewriter_config_pb2.RewriterConfig. + RECOMPUTATION_HEURISTICS, + # This should not match anything. + memory_optimizer_target_node_name_scope='r/gradients/'), + original_metagraph) + self.assertEqual( + len(rewritten_graph_def.node), len(original_metagraph.graph_def.node)) + self.assertEqual(0, + len([ + node for node in original_metagraph.graph_def.node + if 'Recomputed/' in node.name + ])) + self.assertEqual(0, + len([ + node for node in rewritten_graph_def.node + if 'Recomputed/' in node.name + ])) + def _GetMemoryOptimizerSessionConfig(self): rewrite_options = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, -- GitLab From 6da9a6a739ac9a49dcf85617ed7bccfe4bccff4c Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 13:03:42 -0800 Subject: [PATCH 272/311] Make tfe.Metrics Checkpointable Same principle as Layers: use add_variable to add a dependency on any variables created. I've ignored the global/local distinction, since it makes more sense for users to control saving by either adding a dependency on the Metric or not. PiperOrigin-RevId: 187658433 --- tensorflow/contrib/eager/python/BUILD | 1 + .../contrib/eager/python/metrics_impl.py | 23 ++++++++++------ .../contrib/eager/python/metrics_test.py | 27 +++++++++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index e8c514c114..6fb8287030 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -117,6 +117,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ + "//tensorflow/contrib/eager/python:checkpointable_utils", "//tensorflow/contrib/summary:summary_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index 5571e77c70..a34c4f758a 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -30,12 +30,12 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope - +from tensorflow.python.training import checkpointable _to_replace = re.compile("[^A-Za-z0-9.]") -class Metric(object): +class Metric(checkpointable.CheckpointableBase): """A metric holds state for aggregating statistics over an evaluation run. Example use with eager execution: @@ -254,14 +254,21 @@ class Metric(object): else: collections = [ops.GraphKeys.LOCAL_VARIABLES] collections += [ops.GraphKeys.METRIC_VARIABLES] - v = variable_scope.get_variable( - name, - shape, - dtype, - initializer, + # Variables are Checkpointable dependencies of Metrics regardless of the + # global/local distinction. Users can avoid saving variables by not adding a + # dependency on the Metric. + v = self._add_variable_with_custom_getter( + name=name, + shape=shape, + dtype=dtype, + initializer=initializer, trainable=False, collections=collections, - use_resource=True) + use_resource=True, + getter=variable_scope.get_variable, + # Raise duplicate variable exceptions from get_variable rather than + # Checkpointable. + overwrite=True) self._vars.append(v) if context.in_eager_mode(): self._initial_values[v] = v.value() diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index c9106294dc..6b5450ba89 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -18,8 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import tempfile +from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.eager.python import metrics from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util @@ -206,6 +208,31 @@ class MetricsTest(test.TestCase): self.assertAllEqual(m2.result().eval(), 2.0) self.assertAllEqual(m1.result().eval(), 1.0) + @test_util.run_in_graph_and_eager_modes() + def testSaveRestore(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + mean = metrics.Mean() + checkpoint = checkpointable_utils.Checkpoint(mean=mean) + mean.build() + mean._built = True + self.evaluate(mean.init_variables()) + self.evaluate(mean(100.)) + self.evaluate(mean(200.)) + save_path = checkpoint.save(checkpoint_prefix) + self.evaluate(mean(1000.)) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.evaluate(mean(300.)) + self.assertAllEqual(200., self.evaluate(mean.value())) + + restore_mean = metrics.Mean() + restore_checkpoint = checkpointable_utils.Checkpoint(mean=restore_mean) + status = restore_checkpoint.restore(save_path) + restore_update = restore_mean(300.) + status.assert_consumed().run_restore_ops() + self.evaluate(restore_update) + self.assertAllEqual(200., self.evaluate(restore_mean.value())) + self.assertEqual(3, self.evaluate(restore_mean.denom)) if __name__ == "__main__": test.main() -- GitLab From 1e2c2f1cddd52ed86f8d5d7f10faa6498f13dded Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 13:32:35 -0800 Subject: [PATCH 273/311] Add /learning/tfx/ to the visibility group of tensorflow/compiler/jit. PiperOrigin-RevId: 187661883 --- tensorflow/compiler/jit/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index c7c9e9bd7a..955d12dc20 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -29,7 +29,10 @@ load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured") # Target that bundles up the XLA CPU and GPU JIT devices. cc_library( name = "jit", - visibility = [":friends"], + visibility = [ + ":friends", + "//learning/tfx:__subpackages__", + ], deps = [ ":xla_cpu_device", ":xla_cpu_jit", -- GitLab From 4b038da7006c81e3e6cd542a7015d4a84d5c2385 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 2 Mar 2018 13:37:41 -0800 Subject: [PATCH 274/311] Make shape inference error messages more consistent. PiperOrigin-RevId: 187662562 --- .../compiler/xla/service/shape_inference.cc | 410 +++++++++--------- .../xla/service/shape_inference_test.cc | 59 ++- .../xla/tests/broadcast_simple_test.cc | 6 +- tensorflow/compiler/xla/tests/concat_test.cc | 2 +- tensorflow/compiler/xla/tests/map_test.cc | 2 +- 5 files changed, 236 insertions(+), 243 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 607a672025..c54cb3b48d 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -169,11 +169,11 @@ bool AllUnique(tensorflow::gtl::ArraySlice slice) { tensorflow::Status ExpectNotTupleOrOpaque(const Shape& shape, tensorflow::StringPiece op_type) { if (ShapeUtil::IsTuple(shape)) { - return InvalidArgument("Expected non-tuple argument for %s. Got: %s", + return InvalidArgument("Expected non-tuple argument for %s, but got %s.", op_type.ToString().c_str(), ShapeUtil::HumanString(shape).c_str()); } else if (ShapeUtil::IsOpaque(shape)) { - return InvalidArgument("Expected non-opaque argument for %s. Got: %s", + return InvalidArgument("Expected non-opaque argument for %s, but got %s.", op_type.ToString().c_str(), ShapeUtil::HumanString(shape).c_str()); } else { @@ -193,8 +193,7 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, const Shape& accumulator_shape = reducer_shape.result(); if (ShapeUtil::Rank(accumulator_shape) != 0) { - return Unimplemented( - "Reduction function currently must have rank-0 result."); + return InvalidArgument("Reduction function must have rank 0."); } // Check that the accumulator can be passed in as the first argument. @@ -235,8 +234,8 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, if (!ShapeUtil::CompatibleIgnoringFpPrecision(accumulator_shape, reducer_shape.parameters(1))) { return InvalidArgument( - "Reduction function's second parameter shape currently must " - "match the result shape. Got %s vs %s", + "Reduction function's second parameter shape must " + "match the result shape, but got %s vs %s.", ShapeUtil::HumanString(reducer_shape.parameters(1)).c_str(), ShapeUtil::HumanString(accumulator_shape).c_str()); } @@ -258,29 +257,29 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, for (int64 i = 0; i < window.dimensions_size(); ++i) { const auto& dim = window.dimensions(i); if (dim.size() <= 0) { - return InvalidArgument("Window has a non-positive dimension. Window: %s", + return InvalidArgument("Window %s has a non-positive dimension.", window.DebugString().c_str()); } if (dim.stride() <= 0) { - return InvalidArgument("Window has a non-positive stride. Window: %s", + return InvalidArgument("Window %s has a non-positive stride.", window.DebugString().c_str()); } if (!allow_negative_padding && dim.padding_low() < 0) { - return InvalidArgument("Window has a negative low padding. Window: %s", + return InvalidArgument("Window %s has a negative low padding.", window.DebugString().c_str()); } if (!allow_negative_padding && dim.padding_high() < 0) { - return InvalidArgument("Window has a negative high padding. Window: %s", + return InvalidArgument("Window %s has a negative high padding.", window.DebugString().c_str()); } if (dim.base_dilation() < 1) { return InvalidArgument( - "Window has a non-positive base area dilation factor. Window: %s", + "Window %s has a non-positive base area dilation factor.", window.DebugString().c_str()); } if (dim.window_dilation() < 1) { return InvalidArgument( - "Window has a non-positive window dilation factor. Window: %s", + "Window %s has a non-positive window dilation factor.", window.DebugString().c_str()); } @@ -320,8 +319,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, case UNOP_CEIL: if (!ShapeUtil::ElementIsFloating(arg)) { return InvalidArgument( - "expected element type in shape to be floating for floor/ceil " - "operation; got %s", + "Expected element type in shape to be floating for floor/ceil " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return arg; @@ -333,8 +332,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (!ShapeUtil::ElementIsFloating(arg) && !ShapeUtil::ElementIsComplex(arg)) { return InvalidArgument( - "expected element type in shape to be floating or complex for " - "sin/cos/exp/log/tanh operation; got %s", + "Expected element type in shape to be floating or complex for " + "sin/cos/exp/log/tanh operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return arg; @@ -342,8 +341,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, case UNOP_IMAG: if (!ShapeUtil::ElementIsComplex(arg)) { return InvalidArgument( - "expected element type in shape to be complex for real/imag " - "operation; got %s", + "Expected element type in shape to be complex for real/imag " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return ShapeUtil::ChangeElementType(arg, F32); @@ -363,8 +362,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (arg.element_type() != PRED && !primitive_util::IsIntegralType(arg.element_type())) { return InvalidArgument( - "expected pred or an integral element type in argument to not " - "operation; got %s", + "Expected pred or an integral element type in argument to Not " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return arg; @@ -372,8 +371,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, case UNOP_IS_FINITE: if (!ShapeUtil::ElementIsFloating(arg)) { return InvalidArgument( - "expected element type in shape to be floating point for IsFinite " - "operation; got %s", + "Expected element type in shape to be floating point for IsFinite " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return ShapeUtil::ChangeElementType(arg, PRED); @@ -389,10 +388,10 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, tensorflow::gtl::ArraySlice arg_shapes, const int64 dimension) { if (arg_shapes.empty()) { - return InvalidArgument("Concatenate expects at least one argument"); + return InvalidArgument("Concatenate expects at least one argument."); } if (dimension < 0 || dimension >= ShapeUtil::Rank(*arg_shapes[0])) { - return InvalidArgument("dimension to concatenate along out of bounds: %lld", + return InvalidArgument("Concatenate dimension out of bounds: %lld.", dimension); } const Shape* arg_shape = nullptr; @@ -408,14 +407,14 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (ShapeUtil::Rank(*arg_shape) != ShapeUtil::Rank(*shape)) { return InvalidArgument( "Cannot concatenate arrays with different ranks: %lld (%s) vs %lld " - "(%s)", + "(%s).", ShapeUtil::Rank(*arg_shape), ShapeUtil::HumanString(*arg_shape).c_str(), ShapeUtil::Rank(*shape), ShapeUtil::HumanString(*shape).c_str()); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(*arg_shape, *shape)) { return InvalidArgument( - "cannot concatenate arrays with different element types: %s vs %s", + "Cannot concatenate arrays with different element types: %s vs %s.", PrimitiveType_Name(arg_shape->element_type()).c_str(), PrimitiveType_Name(shape->element_type()).c_str()); } @@ -428,9 +427,9 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, // concatenating. } return InvalidArgument( - "cannot concatenate arrays that differ in dimensions other than " + "Cannot concatenate arrays that differ in dimensions other than " "the one being concatenated (the other array dimensions must be " - "the same): %s vs %s in dimension %lld", + "the same): %s vs %s in dimension %lld.", ShapeUtil::HumanString(*arg_shape).c_str(), ShapeUtil::HumanString(*shape).c_str(), dimension); } @@ -452,7 +451,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (primitive_util::IsComplexType(old_element_type) && !primitive_util::IsComplexType(new_element_type)) { return Unimplemented( - "Unsupported conversion from complex to real type: %s => %s", + "Conversion from complex to real type %s => %s is not implemented.", ShapeUtil::HumanString(operand_shape).c_str(), PrimitiveType_Name(new_element_type).c_str()); } @@ -461,7 +460,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, // future, by recursing into the tuple elements to check all sub-conversions // are valid. For now we just reject them, though. return InvalidArgument( - "cannot convert from or to tuple type; requested conversion: %s => %s", + "Convert does not allow tuples, so cannot convert from %s to %s.", ShapeUtil::HumanString(operand_shape).c_str(), PrimitiveType_Name(new_element_type).c_str()); } @@ -474,24 +473,23 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, auto old_element_type = operand_shape.element_type(); if (primitive_util::IsComplexType(old_element_type) != primitive_util::IsComplexType(new_element_type)) { - return Unimplemented( - "Unsupported conversion between real and complex types: %s => %s", - ShapeUtil::HumanString(operand_shape).c_str(), - PrimitiveType_Name(new_element_type).c_str()); + return InvalidArgument("Conversion from complex to real type %s => %s.", + ShapeUtil::HumanString(operand_shape).c_str(), + PrimitiveType_Name(new_element_type).c_str()); } if (ShapeUtil::IsTuple(operand_shape) || new_element_type == TUPLE) { // Note: we may want to support tuple conversions via this operation in the // future, by recursing into the tuple elements to check all sub-conversions // are valid. For now we just reject them, though. return InvalidArgument( - "cannot convert from or to tuple type; requested conversion: %s => %s", + "Cannot convert from or to tuple type; requested conversion: %s => %s.", ShapeUtil::HumanString(operand_shape).c_str(), PrimitiveType_Name(new_element_type).c_str()); } if (primitive_util::BitWidth(old_element_type) != primitive_util::BitWidth(new_element_type)) { return InvalidArgument( - "cannot bitcast types with different bit-widths: %s => %s", + "Cannot bitcast types with different bit-widths: %s => %s.", PrimitiveType_Name(old_element_type).c_str(), PrimitiveType_Name(new_element_type).c_str()); } @@ -504,20 +502,20 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, const int mantissa_bits) { if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( - "expected element type in shape to be floating point for " - "ReducePrecision operation; got %s", + "Expected element type in shape to be floating point for " + "ReducePrecision operation; got %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } if (exponent_bits < 1) { // One exponent bit is necessary to distinguish 0 from infinity. Having // no exponent bits doesn't produce a sensible number, so we require at // least one. - return InvalidArgument("expected exponent_bits >= 1; got %d", + return InvalidArgument("Expected exponent_bits >= 1; got %d.", exponent_bits); } if (mantissa_bits < 0) { // A number with no mantissa bits is still meaningful, however. - return InvalidArgument("expected non-negative mantissa_bits; got %d", + return InvalidArgument("Expected non-negative mantissa_bits; got %d.", mantissa_bits); } return operand_shape; @@ -528,23 +526,23 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, const PaddingConfig& padding_config) { if (ShapeUtil::IsTuple(operand_shape)) { return InvalidArgument( - "pad operation does not support tuple-shape operands"); + "Pad operation does not support tuple-shape operands."); } if (!ShapeUtil::IsScalar(padding_value_shape)) { return InvalidArgument( - "pad operation does not support non-scalar padding values"); + "Pad operation does not support non-scalar padding values."); } if (ShapeUtil::Rank(operand_shape) != padding_config.dimensions_size()) { return InvalidArgument( "The rank of the operand and the padding configuration do not match: " - "%s vs %s", + "%s vs %s.", ShapeUtil::HumanString(operand_shape).c_str(), padding_config.ShortDebugString().c_str()); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(operand_shape, padding_value_shape)) { return InvalidArgument( - "the element types of the operands to pad do not match"); + "The element types of the operands to Pad do not match."); } std::vector dimensions(ShapeUtil::Rank(operand_shape)); for (int64 i = 0; i < operand_shape.dimensions_size(); ++i) { @@ -605,7 +603,7 @@ Status ValidateDotDimensionNumbers( lhs_batch_dimensions) || !dims_in_range(ShapeUtil::Rank(rhs), rhs_contracting_dimensions, rhs_batch_dimensions)) { - return InvalidArgument("A dimension number is out of range in dot: %s", + return InvalidArgument("A dimension number is out of range in Dot: %s.", dimension_numbers.DebugString().c_str()); } @@ -623,7 +621,7 @@ Status ValidateDotDimensionNumbers( if (!dims_unique(lhs_contracting_dimensions, lhs_batch_dimensions) || !dims_unique(rhs_contracting_dimensions, rhs_batch_dimensions)) { - return InvalidArgument("A dimension number is not unique in dot: %s", + return InvalidArgument("A dimension number is not unique in Dot: %s.", dimension_numbers.DebugString().c_str()); } @@ -641,8 +639,7 @@ Status ValidateDotDimensionNumbers( rhs_non_contracting_non_batch_dims < 0 || rhs_non_contracting_non_batch_dims > 1) { return InvalidArgument( - "batch and contracting dimension number mismatch " - "with rank "); + "Batch and contracting dimension number mismatch with rank."); } // Check that batch dimension numbers are ordered before all others, and @@ -654,7 +651,7 @@ Status ValidateDotDimensionNumbers( !std::equal(batch_dim_numbers.begin(), batch_dim_numbers.end(), rhs_batch_dimensions.begin())) { return InvalidArgument( - "batch dimension numbers must precede non-batch dimensions and be" + "Batch dimension numbers must precede non-batch dimensions and be" "monotonically increasing."); } @@ -671,22 +668,22 @@ Status ValidateDotDimensionNumbers( auto fail = [lhs, rhs](const string& addendum) -> Status { string message = tensorflow::strings::Printf( - "cannot infer shape for dot operation: %s %s", + "Cannot infer shape for dot operation: %s %s.", ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str()); if (!addendum.empty()) { - message += ": " + addendum; + message += " " + addendum; } return InvalidArgument("%s", message.c_str()); }; // Check if both element types are the same. if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { - return fail("element types do not match"); + return fail("Element types do not match."); } if ((ShapeUtil::Rank(lhs) < 1) || (ShapeUtil::Rank(rhs) < 1)) { - return fail("dot only supports rank 1 or above."); + return fail("Dot only supports rank 1 or above."); } // Validate basic properties of dot dimension numbers. @@ -696,7 +693,7 @@ Status ValidateDotDimensionNumbers( if (dimension_numbers.lhs_contracting_dimensions_size() != dimension_numbers.rhs_contracting_dimensions_size() || dimension_numbers.lhs_contracting_dimensions_size() != 1) { - return fail("must specify one contracting dimension for both lhs and rhs."); + return fail("Must specify one contracting dimension for both lhs and rhs."); } // Check that contracting dimension sizes match. @@ -706,13 +703,13 @@ Status ValidateDotDimensionNumbers( dimension_numbers.rhs_contracting_dimensions(0); if (lhs.dimensions(lhs_contracting_dimension) != rhs.dimensions(rhs_contracting_dimension)) { - return fail("contracting dimension sizes do not match."); + return fail("Contracting dimension sizes do not match."); } // Check that number of batch dimensions match. if (dimension_numbers.lhs_batch_dimensions_size() != dimension_numbers.rhs_batch_dimensions_size()) { - return fail("must the same number of batch dimensions for lhs and rhs."); + return fail("Must the same number of batch dimensions for lhs and rhs."); } // Check that batch dimension numbers and sizes match. @@ -721,7 +718,7 @@ Status ValidateDotDimensionNumbers( dimension_numbers.rhs_batch_dimensions(i) || lhs.dimensions(dimension_numbers.lhs_batch_dimensions(i)) != rhs.dimensions(dimension_numbers.rhs_batch_dimensions(i))) { - return fail("batch dimension numbers and sizes must match for lhs/rhs."); + return fail("Batch dimension numbers and sizes must match for lhs/rhs."); } } @@ -770,10 +767,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } else if (rhs.dimensions(i) == 1) { output_dimensions[i] = lhs.dimensions(i); } else { - return InvalidArgument("binary op %s with incompatible shapes: %s and %s", - BinaryOperation_Name(operation).c_str(), - ShapeUtil::HumanString(lhs).c_str(), - ShapeUtil::HumanString(rhs).c_str()); + return InvalidArgument( + "Binary op %s with incompatible shapes: %s and %s.", + BinaryOperation_Name(operation).c_str(), + ShapeUtil::HumanString(lhs).c_str(), + ShapeUtil::HumanString(rhs).c_str()); } } return ShapeUtil::MakeShape(ShapeUtil::HigherPrecisionElementType(lhs, rhs), @@ -788,15 +786,15 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // Reject "magic" inference for binops on different shapes, requiring // the user to provide an explicit broadcast dimension in this case. // See b/25177275 for more details. - return InvalidArgument("automatic shape inference not supported: %s and %s", + return InvalidArgument("Automatic shape inference not supported: %s and %s", ShapeUtil::HumanString(smaller_shape).c_str(), ShapeUtil::HumanString(larger_shape).c_str()); } else if (broadcast_dimensions.size() != ShapeUtil::Rank(smaller_shape)) { return InvalidArgument( - "size of broadcast_dimensions has to match lower-rank operand's " + "Size of broadcast_dimensions has to match lower-rank operand's " "rank; " " lower-rank operand's rank is %lld, size of broadcast_dimensions is " - "%zu", + "%zu.", ShapeUtil::Rank(smaller_shape), broadcast_dimensions.size()); } @@ -846,13 +844,13 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( int64 dimension_to_match = broadcast_dimensions.at(i); if (dimension_to_match < 0) { return InvalidArgument( - "broadcast dimension number (%lld) cannot be negative", + "Broadcast dimension number (%lld) cannot be negative.", dimension_to_match); } if (dimension_to_match >= larger_shape.dimensions_size()) { return InvalidArgument( - "broadcast dimension number (%lld) too large; higher-rank " - "operand has rank %d", + "Broadcast dimension number (%lld) too large; higher-rank " + "operand has rank %d.", dimension_to_match, larger_shape.dimensions_size()); } int64 small_dimension_size = smaller_shape.dimensions(i); @@ -863,7 +861,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (small_dimension_size != large_dimension_size && small_dimension_size != 1 && large_dimension_size != 1) { return InvalidArgument( - "broadcast dimension %d mismatch: %lld != %lld; %s and %s", i, + "Broadcast dimension %d mismatch: %lld != %lld; %s and %s.", i, small_dimension_size, large_dimension_size, ShapeUtil::HumanString(smaller_shape).c_str(), ShapeUtil::HumanString(larger_shape).c_str()); @@ -872,7 +870,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // order. if (i > 0 && broadcast_dimensions.at(i - 1) >= dimension_to_match) { return InvalidArgument( - "broadcast dimensions order is wrong: %lld comes after %lld", + "Broadcast dimensions order is wrong: %lld comes after %lld.", dimension_to_match, broadcast_dimensions.at(i - 1)); } @@ -892,7 +890,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return InvalidArgument( - "binary op %s with different element types: %s and %s", + "Binary op %s with different element types: %s and %s.", BinaryOperation_Name(operation).c_str(), ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str()); @@ -904,8 +902,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!broadcast_dimensions.empty() && broadcast_dimensions != identity_dims) { return InvalidArgument( - "broadcast dimensions field must either be not set or be the " - "identity on binary operations with operands of the same rank"); + "Broadcast dimensions field must either be not set or be the " + "identity on binary operations with operands of the same rank."); } } @@ -979,8 +977,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( case BINOP_COMPLEX: { if (!ShapeUtil::ElementIsFloating(lhs)) { return InvalidArgument( - "expected element type in shape to be floating for complex compose " - "operation; got %s", + "Expected element type in shape to be floating for complex compose " + "operation; got %s.", PrimitiveType_Name(lhs.element_type()).c_str()); } TF_ASSIGN_OR_RETURN(const Shape& shape, @@ -989,7 +987,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (lhs.element_type() == F32 && rhs.element_type() == F32) { return ShapeUtil::ChangeElementType(shape, C64); } else { - return Unimplemented("complex component type not supported"); + return Unimplemented("Complex component type is not implemented."); } } case BINOP_AND: @@ -997,8 +995,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (lhs.element_type() != PRED && !primitive_util::IsIntegralType(lhs.element_type())) { return InvalidArgument( - "expected pred or integral type in argument to and/or operation; " - "got %s", + "Expected pred or integral type in argument to and/or operation; " + "got %s.", PrimitiveType_Name(lhs.element_type()).c_str()); } return InferElementwiseBinaryOpShape(operation, lhs, rhs, @@ -1016,7 +1014,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } default: return Unimplemented( - "not yet implemented; infer binary op shape: %s; lhs: %s; rhs: %s", + "Binary op shape inference: %s; lhs: %s; rhs: %s is not implemented.", BinaryOperation_Name(operation).c_str(), lhs.ShortDebugString().c_str(), rhs.ShortDebugString().c_str()); } @@ -1041,7 +1039,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( case TRIOP_SELECT: return InferSelectShape(lhs, rhs, ehs); default: - return InvalidArgument("unknown operation %s", + return InvalidArgument("Unknown operation %s.", TernaryOperation_Name(operation).c_str()); } } @@ -1072,7 +1070,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return result; } default: - return InvalidArgument("unknown operation %s", + return InvalidArgument("Unknown operation %s.", VariadicOperation_Name(operation).c_str()); } } @@ -1082,7 +1080,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const ProgramShape& to_apply, tensorflow::gtl::ArraySlice dimensions) { if (arg_shapes.empty()) { - return InvalidArgument("Map expects at least one argument"); + return InvalidArgument("Map expects at least one argument."); } // All arguments must have the same shape. @@ -1113,7 +1111,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } return InvalidArgument( "Map operation requires all operands to have the same shape; got: " - "%s", + "%s.", Join(pieces, ", ").c_str()); } @@ -1122,7 +1120,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (dimensions.size() != arg_shape->dimensions_size()) { return InvalidArgument( "Map applied to a subset of dimensions currently not supported: " - "arg_dimension_size: %d, requested_map_dimensions_size: %zu", + "arg_dimension_size: %d, requested_map_dimensions_size: %zu.", arg_shape->dimensions_size(), dimensions.size()); } @@ -1130,7 +1128,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( for (int i = 0; i < dimensions.size(); ++i) { if (dimensions[i] != i) { return InvalidArgument( - "Map requires monotonically increasing dimension numbers, found: %s ", + "Map requires monotonically increasing dimension numbers; got: %s.", Join(dimensions, ", ").c_str()); } } @@ -1139,7 +1137,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (arg_shapes.size() != to_apply.parameters_size()) { return InvalidArgument( "Map applied function arity must match number of arguments; got: " - "arity: %d, arguments: %zu", + "arity: %d, arguments: %zu.", to_apply.parameters_size(), arg_shapes.size()); } @@ -1147,8 +1145,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& output_shape = to_apply.result(); if (!ShapeUtil::IsScalar(output_shape)) { return InvalidArgument( - "mapped computation's result has to be a scalar; " - "got: %s", + "Mapped computation's result has to be a scalar; got: %s.", ShapeUtil::HumanString(output_shape).c_str()); } @@ -1157,16 +1154,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::IsScalar(parameter_shape)) { return InvalidArgument( - "mapped computation's parameter has to be a scalar; " - "got parameter %d shape: %s", + "Mapped computation's parameter has to be a scalar; " + "got parameter %d shape: %s.", i, ShapeUtil::HumanString(parameter_shape).c_str()); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(parameter_shape, *arg_shape)) { return InvalidArgument( - "mapped computation's parameter type has to match argument element " - "type; got parameter %d shape: %s, argument shape: %s", + "Mapped computation's parameter type has to match argument element " + "type; got parameter %d shape: %s, argument shape: %s.", i, ShapeUtil::HumanString(parameter_shape).c_str(), ShapeUtil::HumanString(*arg_shape).c_str()); } @@ -1197,21 +1194,21 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected feature_index of batch-norm-training to be " "smaller than the rank of operand_shape; " - "got feature_index %lld, and rank %lld", + "got feature_index %lld, and rank %lld.", feature_index, ShapeUtil::Rank(operand_shape)); } if (feature_index < 0) { return InvalidArgument( "Expected feature_index of batch-norm-training to " - "be a non-negative number, got %lld", + "be a non-negative number, got %lld.", feature_index); } if (ShapeUtil::Rank(operand_shape) < 1) { return InvalidArgument( "Expected the rank of operand to " - "batch-norm-training to be at least 1; got %lld", + "batch-norm-training to be at least 1; got %lld.", ShapeUtil::Rank(operand_shape)); } @@ -1232,7 +1229,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( "The operand to batch-norm-training must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1241,7 +1238,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-training, " "but the shape of offset factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(offset_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1251,7 +1248,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-training, " "but the shape of scale factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(scale_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1264,7 +1261,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of offset factor should be the same as feature count," "but the size of offset factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(offset_shape, 0), feature_count); } @@ -1272,7 +1269,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of scale factor should be the same as feature count," "but the size of scale factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(scale_shape, 0), feature_count); } @@ -1307,21 +1304,21 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected feature_index of batch-norm-inference to be " "smaller than the rank of operand_shape; " - "got feature_index %lld, and rank %lld", + "got feature_index %lld, and rank %lld.", feature_index, ShapeUtil::Rank(operand_shape)); } if (feature_index < 0) { return InvalidArgument( "Expected feature_index of batch-norm-inference to " - "be a non-negative number, got %lld", + "be a non-negative number, got %lld.", feature_index); } if (ShapeUtil::Rank(operand_shape) < 1) { return InvalidArgument( "Expected the rank of operand to " - "batch-norm-inference to be at least 1; got %lld", + "batch-norm-inference to be at least 1; got %lld.", ShapeUtil::Rank(operand_shape)); } @@ -1342,7 +1339,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( "The operand to batch-norm-inference must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1352,7 +1349,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of offset factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(offset_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1363,7 +1360,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of scale factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(scale_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1374,7 +1371,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of mean is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1385,7 +1382,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of variance is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(variance_shape.element_type()).c_str()); } @@ -1398,7 +1395,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of offset factor should be the same as feature count," "but the size of offset factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(offset_shape, 0), feature_count); } @@ -1406,7 +1403,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of scale factor should be the same as feature count," "but the size of scale factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(scale_shape, 0), feature_count); } @@ -1414,7 +1411,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of mean should be the same as feature count," "but the size of mean is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(mean_shape, 0), feature_count); } @@ -1422,7 +1419,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of variance should be the same as feature count," "but the size of variance is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(variance_shape, 0), feature_count); } @@ -1455,7 +1452,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected feature_index of batch-norm-grad to be " "smaller than the rank of operand_shape; " - "got feature_index %lld, and rank %lld", + "got feature_index %lld, and rank %lld.", feature_index, ShapeUtil::Rank(operand_shape)); } @@ -1463,7 +1460,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected operand_shape of batch-norm-grad to have the same rank as" " output_grad_shape; got rank(oprand_shape) %lld, and" - " rank(output_grad_shape) %lld", + " rank(output_grad_shape) %lld.", ShapeUtil::Rank(operand_shape), ShapeUtil::Rank(output_grad_shape)); } @@ -1491,14 +1488,14 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( "The operand to batch-norm-grad must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } if (!ShapeUtil::ElementIsFloating(output_grad_shape)) { return InvalidArgument( "The output_grad to batch-norm-grad must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(output_grad_shape.element_type()).c_str()); } @@ -1507,7 +1504,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of output_grad is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(output_grad_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1517,7 +1514,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of scale factor is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(scale_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1527,7 +1524,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of mean is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1537,7 +1534,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of mean is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1551,7 +1548,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of mean should be the same as feature count," "but the size of offset factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(mean_shape, 0), feature_count); } @@ -1559,7 +1556,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of scale factor should be the same as feature count," "but the size of scale factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(scale_shape, 0), feature_count); } @@ -1567,7 +1564,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of variance should be the same as feature count," "but the size of variance is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(var_shape, 0), feature_count); } @@ -1578,7 +1575,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The bounds of operand shape should be the same as output_grad's," "but the bound of operand_shape at dimension %lld is %lld " - "and the bound of output_grad_shape is %lld", + "and the bound of output_grad_shape is %lld.", i, ShapeUtil::GetDimension(operand_shape, i), ShapeUtil::GetDimension(output_grad_shape, i)); } @@ -1596,7 +1593,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return InvalidArgument( - "Convolution with different element types: %s and %s", + "Convolution with different element types: %s and %s.", ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str()); } @@ -1612,21 +1609,19 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (window.dimensions_size() != num_spatial_dims) { return InvalidArgument( "Window must have same number of dimensions as dimension numbers.\n" - "Window: %s\nDimension numbers: %s", + "Window: %s\nDimension numbers: %s.", window.DebugString().c_str(), dnums.DebugString().c_str()); } const int num_dims = num_spatial_dims + 2; if (ShapeUtil::Rank(lhs) != num_dims) { return InvalidArgument( - "The LHS argument to a convolution should have rank %d.\n" - "lhs: %s", + "The LHS argument to a convolution should have rank %d; lhs: %s.", num_dims, ShapeUtil::HumanString(lhs).c_str()); } if (ShapeUtil::Rank(rhs) != num_dims) { return InvalidArgument( - "The RHS argument to a convolution should have rank %d.\n" - "lhs: %s", + "The RHS argument to a convolution should have rank %d; lhs: %s.", num_dims, ShapeUtil::HumanString(lhs).c_str()); } TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); @@ -1663,26 +1658,26 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( !std::all_of(window_dnums.begin(), window_dnums.end(), in_range) || !std::all_of(output_dnums.begin(), output_dnums.end(), in_range)) { return InvalidArgument( - "A dimension number is out of range in convolution: %s", + "A dimension number is out of range in convolution: %s.", dnums.DebugString().c_str()); } if (input_dnums != expected_dnums) { return InvalidArgument( "Input dimensions of convolution must contain each dimension exactly " - "once: %s", + "once: %s.", dnums.DebugString().c_str()); } if (window_dnums != expected_dnums) { return InvalidArgument( "Window dimensions of convolution must contain each dimension exactly " - "once: %s", + "once: %s.", dnums.DebugString().c_str()); } if (output_dnums != expected_dnums) { return InvalidArgument( "Output dimensions of convolution must contain each dimension exactly " - "once: %s", + "once: %s.", dnums.DebugString().c_str()); } @@ -1706,7 +1701,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected LHS feature dimension (value %lld) to match RHS " "input feature dimension (value %lld); got (%s, %s)\n" - "Dimension numbers: {%s}", + "Dimension numbers: {%s}.", input_features, kernel_input_features, ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str(), dnums.DebugString().c_str()); @@ -1720,7 +1715,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "Window dimensions do not match RHS shape:\n\t" "RHS shape: %s\n\t" "Window: {%s}\n\t" - "Dimension numbers: {%s}", + "Dimension numbers: {%s}.", ShapeUtil::HumanString(rhs).c_str(), window.ShortDebugString().c_str(), dnums.ShortDebugString().c_str()); } @@ -1748,8 +1743,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const tensorflow::gtl::ArraySlice fft_length) { const int64 fft_rank = fft_length.size(); if (fft_rank < 1 || fft_rank > 3) { - return InvalidArgument("FFT only supports ranks 1-3, but got %lld", - fft_rank); + return InvalidArgument("FFT only supports ranks 1-3; got %lld.", fft_rank); } #define RET_CHECK_RANK(x) \ if (x.dimensions_size() < fft_rank) { \ @@ -1762,7 +1756,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( case FFT: case IFFT: if (in.element_type() != C64) { - return InvalidArgument("%s requires C64 input type, found %s", + return InvalidArgument("%s requires C64 input type, found %s.", FftType_Name(fft_type).c_str(), PrimitiveType_Name(in.element_type()).c_str()); } @@ -1770,7 +1764,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return in; case RFFT: { if (in.element_type() != F32) { - return InvalidArgument("RFFT requires F32 input type, found %s", + return InvalidArgument("RFFT requires F32 input type, found %s.", PrimitiveType_Name(in.element_type()).c_str()); } RET_CHECK_RANK(in); @@ -1779,7 +1773,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( fft_length[i]) { return InvalidArgument( "RFFT requires innermost dimensions match fft_length but " - "dimension %lld is %lld and should be %lld", + "dimension %lld is %lld and should be %lld.", in.dimensions_size() - fft_rank + i, in.dimensions(in.dimensions_size() - fft_rank + i), fft_length[i]); @@ -1792,7 +1786,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } case IRFFT: { if (in.element_type() != C64) { - return InvalidArgument("IRFFT requires C64 input type, found %s", + return InvalidArgument("IRFFT requires C64 input type, found %s.", PrimitiveType_Name(in.element_type()).c_str()); } RET_CHECK_RANK(in); @@ -1802,7 +1796,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( fft_length[i]) { return InvalidArgument( "IRFFT requires all but one innermost dimensions match " - "fft_length, but dimension %lld is %lld and should be %lld", + "fft_length, but dimension %lld is %lld and should be %lld.", in.dimensions_size() - fft_rank + i, in.dimensions(in.dimensions_size() - fft_rank + i), fft_length[i]); @@ -1812,7 +1806,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( fft_length[fft_rank - 1] / 2 + 1) { return InvalidArgument( "IRFFT requires innermost dimension matches fft_length/2+1, but " - "dimension %d is %lld and should be %lld", + "dimension %d is %lld and should be %lld.", in.dimensions_size() - 1, in.dimensions(in.dimensions_size() - 1), fft_length[fft_rank - 1] / 2 + 1); } @@ -1850,8 +1844,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( for (int64 dimension : dimensions_to_reduce) { if (dimension >= ShapeUtil::Rank(arg) || dimension < 0) { return InvalidArgument( - "attempting to reduce out-of-bounds dimension %lld in shape %s", - dimension, ShapeUtil::HumanString(arg).c_str()); + "Reducing out-of-bounds dimension %lld in shape %s.", dimension, + ShapeUtil::HumanString(arg).c_str()); } } TF_RETURN_IF_ERROR( @@ -1891,30 +1885,30 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // Check if the select function has a proper shape of (T,T) -> PRED. if (select_shape.parameters_size() != 2) { return InvalidArgument( - "select function must take 2 parameters, but " + "Select function must take 2 parameters, but " "takes %d parameter(s).", select_shape.parameters_size()); } const Shape& select_result_shape = select_shape.result(); if (!ShapeUtil::Compatible(select_result_shape, ShapeUtil::MakeShape(PRED, {}))) { - return Unimplemented("select function must have rank-0 PRED result."); + return InvalidArgument("Select function must have rank-0 PRED result."); } const Shape& operand_element_shape = ShapeUtil::MakeShape(operand_shape.element_type(), {}); if (!ShapeUtil::CompatibleIgnoringFpPrecision(operand_element_shape, select_shape.parameters(0))) { return InvalidArgument( - "select function's first parameter shape currently must " - "match the operand element shape. Got %s vs %s", + "Select function's first parameter shape currently must " + "match the operand element shape, but got %s vs %s.", ShapeUtil::HumanString(select_shape.parameters(0)).c_str(), ShapeUtil::HumanString(operand_element_shape).c_str()); } if (!ShapeUtil::CompatibleIgnoringFpPrecision(operand_element_shape, select_shape.parameters(1))) { return InvalidArgument( - "select function's second parameter shape currently must " - "match the operand element shape. Got %s vs %s", + "Select function's second parameter shape currently must " + "match the operand element shape, but got %s vs %s.", ShapeUtil::HumanString(select_shape.parameters(1)).c_str(), ShapeUtil::HumanString(operand_element_shape).c_str()); } @@ -1931,8 +1925,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::CompatibleIgnoringFpPrecision(source_shape, window_result_shape)) { return InvalidArgument( - "source shape does not match the shape of window-reduced operand: " - "source(%s), window-reduced operand(%s)", + "Source shape does not match the shape of window-reduced operand: " + "source(%s), window-reduced operand(%s).", ShapeUtil::HumanString(source_shape).c_str(), ShapeUtil::HumanString(window_result_shape).c_str()); } @@ -1946,7 +1940,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( auto error = [&](const string& message) { return InvalidArgument( "%s in slice operation; argument shape: %s; starts: {%s}; limits: " - "{%s}; strides: {%s}", + "{%s}; strides: {%s}.", message.c_str(), ShapeUtil::HumanString(arg).c_str(), Join(starts, ",").c_str(), Join(limits, ",").c_str(), Join(strides, ",").c_str()); @@ -1969,7 +1963,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (starts.size() != ShapeUtil::Rank(arg)) { return InvalidArgument( - "slice index count does not match argument rank: %zu vs %lld", + "Slice index count does not match argument rank: %zu vs %lld.", starts.size(), ShapeUtil::Rank(arg)); } @@ -1979,7 +1973,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( int64 limit_index = limits[dimension]; int64 stride = strides[dimension]; if (start_index < 0) { - return InvalidArgument("negative start index to slice: %lld", + return InvalidArgument("Negative start index to slice: %lld.", start_index); } if (limit_index > arg.dimensions(dimension)) { @@ -1999,7 +1993,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( limit_index, start_index)); } if (stride <= 0) { - return InvalidArgument("stride (%lld) must be positive", stride); + return InvalidArgument("Stride (%lld) must be positive.", stride); } sizes.push_back((limit_index - start_index + stride - 1) / stride); } @@ -2023,20 +2017,20 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::Rank(start_indices_shape) != 1) { return InvalidArgument( - "dynamic slice start indices of rank %lld must be rank1.", + "Dynamic slice start indices of rank %lld must be rank1.", ShapeUtil::Rank(start_indices_shape)); } if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { return InvalidArgument( - "dynamic slice start indices must be of integral type."); + "Dynamic slice start indices must be of integral type."); } const int64 start_num_dims = start_indices_shape.dimensions(0); if (ShapeUtil::Rank(operand_shape) != start_num_dims) { return InvalidArgument( - "dynamic slice start number of dimensions %lld (%s) must match rank " - "%lld of slice input (%s)", + "Dynamic slice start number of dimensions %lld (%s) must match rank " + "%lld of slice input (%s).", start_num_dims, ShapeUtil::HumanString(start_indices_shape).c_str(), ShapeUtil::Rank(operand_shape), ShapeUtil::HumanString(operand_shape).c_str()); @@ -2044,7 +2038,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (slice_sizes.size() != ShapeUtil::Rank(operand_shape)) { return InvalidArgument( - "dynamic slice index count does not match argument rank: %zu vs %lld", + "Dynamic slice index count does not match argument rank: %zu vs %lld.", slice_sizes.size(), ShapeUtil::Rank(operand_shape)); } @@ -2052,12 +2046,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const int64 input_dim_size = operand_shape.dimensions(dim); const int64 slice_dim_size = slice_sizes[dim]; if (slice_dim_size < 0) { - return InvalidArgument("negative size index to dynamic slice: %lld", + return InvalidArgument("Negative size index to dynamic slice: %lld.", slice_dim_size); } if (slice_dim_size > input_dim_size) { return InvalidArgument( - "slice dim size %lld greater than dynamic slice dimension: %lld", + "Slice dim size %lld greater than dynamic slice dimension: %lld.", slice_dim_size, input_dim_size); } VLOG(2) << tensorflow::strings::Printf("slice_sizes[%lld] = %lld", dim, @@ -2086,20 +2080,20 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::Rank(start_indices_shape) != 1) { return InvalidArgument( - "dynamic update slice start indices of rank %lld must be rank1.", + "Dynamic update slice start indices of rank %lld must be rank1.", ShapeUtil::Rank(start_indices_shape)); } if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { return InvalidArgument( - "dynamic update slice start indices must be of integral type."); + "Dynamic update slice start indices must be of integral type."); } const int64 start_num_dims = start_indices_shape.dimensions(0); if (ShapeUtil::Rank(operand_shape) != start_num_dims) { return InvalidArgument( - "dynamic slice start number of dimensions %lld (%s) must match rank " - "%lld of slice input (%s)", + "Dynamic slice start number of dimensions %lld (%s) must match rank " + "%lld of slice input (%s).", start_num_dims, ShapeUtil::HumanString(start_indices_shape).c_str(), ShapeUtil::Rank(operand_shape), ShapeUtil::HumanString(operand_shape).c_str()); @@ -2107,16 +2101,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::Rank(update_shape) != ShapeUtil::Rank(operand_shape)) { return InvalidArgument( - "dynamic update slice update rank does not match argument rank: " - "%lld vs %lld", + "Dynamic update slice update rank does not match argument rank: " + "%lld vs %lld.", ShapeUtil::Rank(update_shape), ShapeUtil::Rank(operand_shape)); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(operand_shape, update_shape)) { return InvalidArgument( - "dynamic update slice update element type does not match argument. " - "operand.element_type: %s vs update.element_type: %s", + "Dynamic update slice update element type does not match argument. " + "operand.element_type: %s vs update.element_type: %s.", PrimitiveType_Name(operand_shape.element_type()).c_str(), PrimitiveType_Name(update_shape.element_type()).c_str()); } @@ -2126,12 +2120,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const int64 update_dim_size = update_shape.dimensions(dim); if (update_dim_size < 0) { return InvalidArgument( - "size index %lld to dynamic update slice must be >= 0", + "Size index %lld to dynamic update slice must be >= 0.", update_dim_size); } if (update_dim_size > input_dim_size) { return InvalidArgument( - "update dim size %lld greater than dynamic slice dimension: %lld", + "Update dim size %lld greater than dynamic slice dimension: %lld.", update_dim_size, input_dim_size); } VLOG(2) << tensorflow::strings::Printf("update_sizes[%lld] = %lld", dim, @@ -2151,7 +2145,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( for (int64 dimension : dimensions) { if (dimension >= ShapeUtil::Rank(operand_shape) || dimension < 0) { return InvalidArgument( - "one of the reverse dimensions (%lld) is out-of-bounds in shape %s", + "One of the reverse dimensions (%lld) is out-of-bounds in shape %s.", dimension, ShapeUtil::HumanString(operand_shape).c_str()); } } @@ -2162,14 +2156,14 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& arg, int64 index) { if (!ShapeUtil::IsTuple(arg)) { return InvalidArgument( - "cannot infer shape: attempting to index into non-tuple: %s", + "Cannot infer shape: attempting to index into non-tuple: %s.", ShapeUtil::HumanString(arg).c_str()); } if (index >= arg.tuple_shapes_size()) { return InvalidArgument( - "cannot infer shape: attempt to index out of tuple bounds: %lld " - ">= %d in shape %s", + "Cannot infer shape: attempt to index out of tuple bounds: %lld " + ">= %d in shape %s.", index, arg.tuple_shapes_size(), ShapeUtil::HumanString(arg).c_str()); } @@ -2181,17 +2175,17 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& init) { // Check the number of parameters for given computations. if (condition.parameters_size() != 1) { - return InvalidArgument("condition must take 1 arguments; got %d", + return InvalidArgument("Condition must take 1 arguments; got %d.", condition.parameters_size()); } if (body.parameters_size() != 1) { - return InvalidArgument("body must take 1 arguments; got %d", + return InvalidArgument("Body must take 1 arguments; got %d.", body.parameters_size()); } auto shape_string = [&]() { return tensorflow::strings::Printf( - "condition: %s; body: %s; init: %s", + "Condition: %s; body: %s; init: %s.", ShapeUtil::HumanString(condition).c_str(), ShapeUtil::HumanString(body).c_str(), ShapeUtil::HumanString(init).c_str()); @@ -2199,15 +2193,15 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // Check the shapes of computation parameters and return types. if (!ShapeUtil::ShapeIs(condition.result(), PRED, {})) { - return InvalidArgument("condition must return a boolean; got %s", + return InvalidArgument("Condition must return a boolean; got %s.", shape_string().c_str()); } if (!ShapeUtil::Compatible(body.result(), condition.parameters(0)) || !ShapeUtil::Compatible(body.result(), body.parameters(0)) || !ShapeUtil::Compatible(body.result(), init)) { return InvalidArgument( - "the parameter of condition and body, the result of the body, and init " - "must all have the same shape; got %s", + "The parameter of condition and body, the result of the body, and init " + "must all have the same shape; got %s.", shape_string().c_str()); } @@ -2219,7 +2213,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& false_operand, const ProgramShape& true_computation, const ProgramShape& false_computation) { if (!ShapeUtil::ShapeIs(predicate, PRED, {})) { - return InvalidArgument("predicate must be a boolean; got %s.", + return InvalidArgument("Predicate must be a boolean; got %s.", ShapeUtil::HumanString(predicate).c_str()); } @@ -2302,8 +2296,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::ElementsIn(operand) != ShapeUtil::ElementsIn(inferred_shape)) { return InvalidArgument( - "reshape operation has mismatched element counts: from=%lld (%s) " - "to=%lld (%s)", + "Reshape operation has mismatched element counts: from=%lld (%s) " + "to=%lld (%s).", ShapeUtil::ElementsIn(operand), ShapeUtil::HumanString(operand).c_str(), ShapeUtil::ElementsIn(inferred_shape), ShapeUtil::HumanString(inferred_shape).c_str()); @@ -2351,7 +2345,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(max, "clamp max")); if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(min, operand) || !ShapeUtil::SameElementTypeIgnoringFpPrecision(max, operand)) { - return InvalidArgument("clamp op with different operand types: %s, %s, %s", + return InvalidArgument("Clamp with different operand types: %s, %s, %s.", ShapeUtil::HumanString(min).c_str(), ShapeUtil::HumanString(operand).c_str(), ShapeUtil::HumanString(max).c_str()); @@ -2372,7 +2366,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } } return Unimplemented( - "not yet implemented: %s, %s %s", min.ShortDebugString().c_str(), + "%s, %s %s is not implemented.", min.ShortDebugString().c_str(), max.ShortDebugString().c_str(), operand.ShortDebugString().c_str()); } @@ -2391,13 +2385,13 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } if (!compatible) { return InvalidArgument( - "operands to select must be the same shape; got %s and %s", + "Operands to select must be the same shape; got %s and %s.", ShapeUtil::HumanString(on_true).c_str(), ShapeUtil::HumanString(on_false).c_str()); } if (pred.element_type() != PRED) { return InvalidArgument( - "select's pred operand must have PRED element type; got %s", + "Select's pred operand must have PRED element type; got %s.", ShapeUtil::HumanString(pred).c_str()); } if (ShapeUtil::SameDimensions(pred, on_true) || ShapeUtil::Rank(pred) == 0) { @@ -2407,9 +2401,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return ShapeUtil::ChangeElementType( on_true, ShapeUtil::HigherPrecisionElementType(on_true, on_false)); } else { - return Unimplemented( - "select operation with non-scalar predicate with dimensionality " - " different from the other operands: %s", + return InvalidArgument( + "Select operation with non-scalar predicate with dimensionality " + " different from the other operands: %s.", ShapeUtil::HumanString(pred).c_str()); } } @@ -2427,7 +2421,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Call applied function arity must match number of arguments; got: " "arity: %d, arguments: %zu; computation signature: %s; argument " - "shapes: [%s]", + "shapes: [%s].", to_apply.parameters_size(), arg_shapes.size(), computation_signature.c_str(), argument_shapes.c_str()); } @@ -2439,7 +2433,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::Compatible(arg_shape, param_shape)) { return InvalidArgument( "Call parameter must match argument; got parameter %d shape: %s, " - "argument shape: %s", + "argument shape: %s.", i, ShapeUtil::HumanString(param_shape).c_str(), ShapeUtil::HumanString(arg_shape).c_str()); } @@ -2454,14 +2448,14 @@ static Status ValidateGatherDimensionNumbers( const GatherDimensionNumbers& dim_numbers) { if (!c_is_sorted(dim_numbers.output_window_dims())) { return InvalidArgument( - "Output window dimensions in gather op must be ascending; got: %s", + "Output window dimensions in gather op must be ascending; got: %s.", Join(dim_numbers.output_window_dims(), ", ").c_str()); } if (c_adjacent_find(dim_numbers.output_window_dims()) != dim_numbers.output_window_dims().end()) { return InvalidArgument( - "Output window dimensions in gather op must not repeat; got: %s", + "Output window dimensions in gather op must not repeat; got: %s.", Join(dim_numbers.output_window_dims(), ", ").c_str()); } @@ -2474,7 +2468,7 @@ static Status ValidateGatherDimensionNumbers( if (window_index < 0 || window_index >= output_shape_rank) { return InvalidArgument( "Window index %d in gather op is out of bounds; got %lld, but should " - "have been in [0,%lld)", + "have been in [0,%lld).", i, window_index, output_shape_rank); } } @@ -2496,7 +2490,7 @@ static Status ValidateGatherDimensionNumbers( gather_dim_to_input_dim >= input_shape.dimensions_size()) { return InvalidArgument( "Invalid gather_dims_to_operand_dims mapping; domain is [0, %d), " - "got: %d->%lld", + "got: %d->%lld.", input_shape.dimensions_size(), i, gather_dim_to_input_dim); } } @@ -2511,7 +2505,7 @@ static Status ValidateGatherDimensionNumbers( sorted_gather_dims_to_operand_dims.end()) { return InvalidArgument( "Repeated dimensions are not allowed in gather_dims_to_operand_dims; " - "got: %s", + "got: %s.", Join(dim_numbers.gather_dims_to_operand_dims(), ", ").c_str()); } @@ -2519,7 +2513,7 @@ static Status ValidateGatherDimensionNumbers( if (elided_dim < 0 || elided_dim >= input_shape.dimensions_size()) { return InvalidArgument( "Invalid elided_window_dims set in gather op; valid range is [0, " - "%d), got: %lld", + "%d), got: %lld.", input_shape.dimensions_size(), elided_dim); } } @@ -2534,7 +2528,7 @@ static Status ValidateGatherDimensionNumbers( dim_numbers.elided_window_dims().end()) { return InvalidArgument( "Repeated dimensions not allowed in elided_window_dims in gather op; " - "got: %s", + "got: %s.", Join(dim_numbers.elided_window_dims(), ", ").c_str()); } @@ -2552,7 +2546,7 @@ static Status ValidateGatherDimensionNumbers( if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { return InvalidArgument( - "Gather indices parameter must be an integral tensor; got %s", + "Gather indices parameter must be an integral tensor; got %s.", ShapeUtil::HumanString(gather_indices_shape).c_str()); } @@ -2586,7 +2580,7 @@ static Status ValidateGatherDimensionNumbers( if (window_bounds.size() != input_shape.dimensions_size()) { return InvalidArgument( "Gather op must have one window bound for every input dimension; got: " - "len(window_bounds)=%lu, input_shape.rank=%d", + "len(window_bounds)=%lu, input_shape.rank=%d.", window_bounds.size(), input_shape.dimensions_size()); } @@ -2596,7 +2590,7 @@ static Status ValidateGatherDimensionNumbers( return InvalidArgument( "All components of the window index in a gather op must either be a " "output window index or explicitly elided; got len(window_bounds)=%lu, " - "output_window_bounds=%s, elided_window_bounds=%s", + "output_window_bounds=%s, elided_window_bounds=%s.", window_bounds.size(), Join(gather_dim_numbers.output_window_dims(), ",").c_str(), Join(gather_dim_numbers.elided_window_dims(), ",").c_str()); @@ -2609,7 +2603,7 @@ static Status ValidateGatherDimensionNumbers( return InvalidArgument( "Window bound at index %d in gather op is out of range, must be " "within " - "[0, %lld), got %lld", + "[0, %lld), got %lld.", i, corresponding_input_bound + 1, window_bound); } } @@ -2618,7 +2612,7 @@ static Status ValidateGatherDimensionNumbers( if (window_bounds[gather_dim_numbers.elided_window_dims(i)] != 1) { return InvalidArgument( "Gather op can only elide window indices with bound 1, but bound is " - "%lld for index %lld at position %d", + "%lld for index %lld at position %d.", window_bounds[gather_dim_numbers.elided_window_dims(i)], gather_dim_numbers.elided_window_dims(i), i); } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 029d2b3b86..0e61994a78 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -135,7 +135,7 @@ TEST_F(ShapeInferenceTest, SelectBadShapes) { TernaryOperation::TRIOP_SELECT, pred_, matrix_64_48_, matrix_32_64_); ASSERT_FALSE(inferred_status_error1.ok()); ASSERT_THAT(inferred_status_error1.status().error_message(), - HasSubstr("operands to select must be the same shape")); + HasSubstr("Operands to select must be the same shape")); auto inferred_status_error2 = ShapeInference::InferTernaryOpShape( TernaryOperation::TRIOP_SELECT, s32_, matrix_64_48_, matrix_64_48_); @@ -340,7 +340,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSourceShape) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("source shape does not match")); + HasSubstr("Source shape does not match")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape1) { @@ -351,7 +351,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape1) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function must take 2 parameters")); + HasSubstr("Select function must take 2 parameters")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape2) { @@ -362,7 +362,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape2) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function must have rank-0 PRED")); + HasSubstr("Select function must have rank-0 PRED")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape3) { @@ -373,7 +373,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape3) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function's first parameter")); + HasSubstr("Select function's first parameter")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape4) { @@ -384,7 +384,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape4) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function's second parameter")); + HasSubstr("Select function's second parameter")); } TEST_F(ShapeInferenceTest, Convolve) { @@ -906,7 +906,7 @@ TEST_F(ShapeInferenceTest, ScalarDotVector) { ShapeInference::InferDotOpShape(f32_, vector_32_, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("dot only supports rank")); + HasSubstr("Dot only supports rank")); } // 3D 2D: error @@ -918,7 +918,7 @@ TEST_F(ShapeInferenceTest, DotWithRankHigherThanTwo) { ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("batch and contracting dimension number mismatch")); + HasSubstr("Batch and contracting dimension number mismatch")); } // vector vector -> scalar @@ -1024,7 +1024,7 @@ TEST_F(ShapeInferenceTest, DotWithTwoContractingDimsFails) { ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("must specify one contracting dimension for both " + HasSubstr("Must specify one contracting dimension for both " "lhs and rhs")); } @@ -1044,7 +1044,7 @@ TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) { ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("batch dimension numbers and sizes must match")); + HasSubstr("Batch dimension numbers and sizes must match")); } // BatchMatMul with different batch dimension numbers fails. @@ -1063,7 +1063,7 @@ TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersFails) { ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("batch dimension numbers must precede non-batch")); + HasSubstr("Batch dimension numbers must precede non-batch")); } // BatchMatMul with out-of-range dimension numbers fails. @@ -1166,42 +1166,42 @@ TEST_F(ShapeInferenceTest, BinOpBroadcastBadDimension) { BinaryOperation::BINOP_ADD, tensor, vec8, {}); ASSERT_FALSE(inferred_status_error1.ok()); ASSERT_THAT(inferred_status_error1.status().error_message(), - HasSubstr("automatic")); + HasSubstr("Automatic")); // broadcast_dimension out of bounds for tensor's rank auto inferred_status_error2 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, vec8, {3}); ASSERT_FALSE(inferred_status_error2.ok()); ASSERT_THAT(inferred_status_error2.status().error_message(), - ContainsRegex("broadcast dimension number .* too large")); + ContainsRegex("Broadcast dimension number .* too large")); // broadcast_dimension doesn't match corresponding dimension auto inferred_status_error3 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, vec8, {0}); ASSERT_FALSE(inferred_status_error3.ok()); ASSERT_THAT(inferred_status_error3.status().error_message(), - HasSubstr("broadcast dimension 0 mismatch")); + HasSubstr("Broadcast dimension 0 mismatch")); // broadcast_dimensions list too long auto inferred_status_error4 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, matrix8_4, {0, 1, 2}); ASSERT_FALSE(inferred_status_error4.ok()); ASSERT_THAT(inferred_status_error4.status().error_message(), - HasSubstr("size of broadcast_dimensions has to match")); + HasSubstr("broadcast_dimensions has to match")); // there's a dimension above the rank of the tensor auto inferred_status_error5 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, matrix8_4, {3, 0}); ASSERT_FALSE(inferred_status_error5.ok()); ASSERT_THAT(inferred_status_error5.status().error_message(), - ContainsRegex("broadcast dimension number .* too large")); + ContainsRegex("dimension number .* too large")); // broadcasting dimensions don't match in this order auto inferred_status_error6 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, matrix8_4, {2, 1}); ASSERT_FALSE(inferred_status_error6.ok()); ASSERT_THAT(inferred_status_error6.status().error_message(), - HasSubstr("broadcast dimension 0 mismatch")); + HasSubstr("dimension 0 mismatch")); // The following two tests make sure that broadcasting dimensions are listed // in a proper (strictly increasing) order, even if the lower-rank array @@ -1210,13 +1210,13 @@ TEST_F(ShapeInferenceTest, BinOpBroadcastBadDimension) { BinaryOperation::BINOP_ADD, tensor8_8_8, matrix8_8, {0, 0}); ASSERT_FALSE(inferred_status_error7.ok()); ASSERT_THAT(inferred_status_error7.status().error_message(), - HasSubstr("broadcast dimensions order is wrong")); + HasSubstr("dimensions order is wrong")); auto inferred_status_error8 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor8_8_8, matrix8_8, {1, 0}); ASSERT_FALSE(inferred_status_error8.ok()); ASSERT_THAT(inferred_status_error8.status().error_message(), - HasSubstr("broadcast dimensions order is wrong")); + HasSubstr("dimensions order is wrong")); } // Tests for the while instruction with proper shapes. @@ -1242,7 +1242,7 @@ TEST_F(ShapeInferenceTest, WhileWithBadShapes) { ShapeInference::InferWhileShape(bad_shape_1, body, result_shape); ASSERT_FALSE(inferred_status_error1.ok()); ASSERT_THAT(inferred_status_error1.status().error_message(), - HasSubstr("condition must take 1 arguments")); + HasSubstr("Condition must take 1 arguments")); auto bad_shape_2 = ShapeUtil::MakeProgramShape({s32_, result_shape}, result_shape); @@ -1250,14 +1250,14 @@ TEST_F(ShapeInferenceTest, WhileWithBadShapes) { ShapeInference::InferWhileShape(cond, bad_shape_2, result_shape); ASSERT_FALSE(inferred_status_error2.ok()); ASSERT_THAT(inferred_status_error2.status().error_message(), - HasSubstr("body must take 1 arguments")); + HasSubstr("Body must take 1 arguments")); auto bad_shape_3 = ShapeUtil::MakeProgramShape({result_shape}, s32_); auto inferred_status_error3 = ShapeInference::InferWhileShape(bad_shape_3, body, result_shape); ASSERT_FALSE(inferred_status_error3.ok()); ASSERT_THAT(inferred_status_error3.status().error_message(), - HasSubstr("condition must return a boolean")); + HasSubstr("Condition must return a boolean")); auto bad_shape_4 = ShapeUtil::MakeProgramShape({result_shape}, vector_32_); auto inferred_status_error4 = @@ -1301,13 +1301,13 @@ TEST_F(ShapeInferenceTest, ConcatenateWithBadShapes) { ShapeInference::InferConcatOpShape({&vector_32_}, /*dimension=*/-1); ASSERT_FALSE(inferred_status_error2.ok()); ASSERT_THAT(inferred_status_error2.status().error_message(), - HasSubstr("dimension to concatenate along out of bounds: -1")); + HasSubstr("dimension out of bounds: -1")); auto inferred_status_error3 = ShapeInference::InferConcatOpShape({&vector_32_}, /*dimension=*/1); ASSERT_FALSE(inferred_status_error3.ok()); ASSERT_THAT(inferred_status_error3.status().error_message(), - HasSubstr("dimension to concatenate along out of bounds: 1")); + HasSubstr("dimension out of bounds: 1")); Shape tuple = ShapeUtil::MakeTupleShape({vector_32_}); auto inferred_status_error4 = ShapeInference::InferConcatOpShape( @@ -1315,21 +1315,20 @@ TEST_F(ShapeInferenceTest, ConcatenateWithBadShapes) { ASSERT_FALSE(inferred_status_error4.ok()); ASSERT_THAT( inferred_status_error4.status().error_message(), - HasSubstr("Expected non-tuple argument for operand of concatenation.")); + HasSubstr("Expected non-tuple argument for operand of concatenation")); const Shape vector_s32 = ShapeUtil::MakeShape(S32, {32}); auto inferred_status_error5 = ShapeInference::InferConcatOpShape( {&vector_32_, &vector_s32}, /*dimension=*/0); ASSERT_FALSE(inferred_status_error5.ok()); - ASSERT_THAT( - inferred_status_error5.status().error_message(), - HasSubstr("cannot concatenate arrays with different element types")); + ASSERT_THAT(inferred_status_error5.status().error_message(), + HasSubstr("concatenate arrays with different element types")); auto inferred_status_error6 = ShapeInference::InferConcatOpShape( {&matrix_32_48_, &matrix_32_64_}, /*dimension=*/0); ASSERT_FALSE(inferred_status_error6.ok()); ASSERT_THAT(inferred_status_error6.status().error_message(), - HasSubstr("cannot concatenate arrays that differ in " + HasSubstr("concatenate arrays that differ in " "dimensions other than the one being " "concatenated")); } @@ -1467,7 +1466,7 @@ TEST_F(ShapeInferenceTest, Conditional) { ShapeUtil::MakeProgramShape({vector_64_}, f32_)); EXPECT_FALSE(inferred_status_error0.ok()); EXPECT_THAT(inferred_status_error0.status().error_message(), - HasSubstr("predicate must be a boolean")); + HasSubstr("Predicate must be a boolean")); auto inferred_status_error1 = ShapeInference::InferConditionalShape( pred_, ShapeUtil::MakeTupleShape({f32_, vector_32_}), matrix_32_48_, diff --git a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc index 03f5e08315..97095f1cc4 100644 --- a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc +++ b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc @@ -662,7 +662,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidBinaryAndDegenerateBroadcasting) { auto result_status = Execute(&b, {}); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("broadcast dimension 0 mismatch")); + HasSubstr("dimension 0 mismatch")); } XLA_TEST_F(BroadcastSimpleTest, InvalidInDimensionBroadcasting) { @@ -675,7 +675,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidInDimensionBroadcasting) { auto result_status = Execute(&b, {}); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("binary op BINOP_ADD with incompatible shapes")); + HasSubstr("op BINOP_ADD with incompatible shapes")); } XLA_TEST_F(BroadcastSimpleTest, InvalidDegenerateBroadcasting) { @@ -688,7 +688,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidDegenerateBroadcasting) { auto result_status = Execute(&b, {}); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("binary op BINOP_ADD with incompatible shapes")); + HasSubstr("op BINOP_ADD with incompatible shapes")); } } // namespace diff --git a/tensorflow/compiler/xla/tests/concat_test.cc b/tensorflow/compiler/xla/tests/concat_test.cc index 1bcad5a3f3..fb0e9c724a 100644 --- a/tensorflow/compiler/xla/tests/concat_test.cc +++ b/tensorflow/compiler/xla/tests/concat_test.cc @@ -75,7 +75,7 @@ XLA_TEST_F(ConcatTest, CannotConcatR0WithR0) { StatusOr computation_status = builder.Build(); ASSERT_FALSE(computation_status.ok()); EXPECT_THAT(computation_status.status().ToString(), - HasSubstr("dimension to concatenate along out of bounds: 0")); + HasSubstr("out of bounds: 0")); } XLA_TEST_F(ConcatTest, Concat_R1_L0_With_R1_L0) { diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc index 2b0f7e6e80..0cd812fd1b 100644 --- a/tensorflow/compiler/xla/tests/map_test.cc +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -531,7 +531,7 @@ TEST_F(MapTest, MapOperantionWithBuildError) { ASSERT_TRUE(!computation_status.ok()); EXPECT_THAT( computation_status.status().ToString(), - ::testing::HasSubstr("error from: ErrorAdd: binary op BINOP_ADD with " + ::testing::HasSubstr("error from: ErrorAdd: Binary op BINOP_ADD with " "different element types: f32[] and u16[]")); } -- GitLab From 1a15d58c8204b145c545b27efdd0a1ca069cacdc Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 2 Mar 2018 14:00:07 -0800 Subject: [PATCH 275/311] [TF:XLA] Bump open source llvm revision to r326571 PiperOrigin-RevId: 187665541 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ea8f42ab8d..1af246f9dc 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/193aea3782308c66a7a12f1c37520a1b4ff1dbd8.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/193aea3782308c66a7a12f1c37520a1b4ff1dbd8.tar.gz", ], - sha256 = "7990b4d446de971e0acc481942920452a182d2f87a8164bdc117fd9b9ace591d", - strip_prefix = "llvm-9a6e78e4adc959d2825f7af35b4ed0e09394d840", + sha256 = "2eda56deafb8da85bc23aa52fa1fb8c39da6a58c865e5216d0a0787bd09a09ed", + strip_prefix = "llvm-193aea3782308c66a7a12f1c37520a1b4ff1dbd8", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From d3ece65e340ca7cd00874c460cf9f3e631346921 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 14:33:39 -0800 Subject: [PATCH 276/311] Checkpointable: Have MultiRNNCell add its dependent cells as dependencies PiperOrigin-RevId: 187670464 --- .../contrib/rnn/python/kernel_tests/core_rnn_cell_test.py | 2 ++ tensorflow/python/ops/rnn_cell_impl.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 0e62b315b6..d41fc0b3ac 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -187,6 +187,8 @@ class RNNCellTest(test.TestCase): ], state_is_tuple=False) self.assertEqual(cell.dtype, None) + self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name) + self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name) g, out_m = cell(x, m) # Layer infers the input type. self.assertEqual(cell.dtype, dtype.name) diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index 923348ea44..bd7c731210 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -1187,6 +1187,10 @@ class MultiRNNCell(RNNCell): "cells must be a list or tuple, but saw: %s." % cells) self._cells = cells + for cell_number, cell in enumerate(self._cells): + # Add Checkpointable dependencies on these cells so their variables get + # saved with this object when using object-based saving. + self._track_checkpointable(cell, name="cell-%d" % (cell_number,)) self._state_is_tuple = state_is_tuple if not state_is_tuple: if any(nest.is_sequence(c.state_size) for c in self._cells): -- GitLab From 16f74956eb75511f1bf47a62a998ed9a434a8249 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 14:55:26 -0800 Subject: [PATCH 277/311] Add a small helper which is useful for quicker debugging. PiperOrigin-RevId: 187673654 --- tensorflow/contrib/py2tf/pyct/transformer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/transformer.py b/tensorflow/contrib/py2tf/pyct/transformer.py index 877d52af01..57016bb4ce 100644 --- a/tensorflow/contrib/py2tf/pyct/transformer.py +++ b/tensorflow/contrib/py2tf/pyct/transformer.py @@ -44,6 +44,12 @@ class Base(gast.NodeTransformer): self._col_offset = 0 self.context = context + def debug_print(self, node): + """Helper method useful for debugging.""" + if __debug__: + print(pretty_printer.fmt(node)) + return node + def visit(self, node): source_code = self.context.source_code source_file = self.context.source_file -- GitLab From 809c84dc3a6252efab2b366f167135ed7826dee7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 15:06:13 -0800 Subject: [PATCH 278/311] Begin a library for statistical testing of samplers. So far, it consists of one-sample and two-sample equality-of-means assertions, and power analysis and experimental design for those, because that's what was needed for testing the LKJ distribution. If this API shape proves viable, more to come. PiperOrigin-RevId: 187675337 --- tensorflow/contrib/distributions/BUILD | 13 + .../kernel_tests/statistical_testing_test.py | 166 ++++ .../python/ops/statistical_testing.py | 728 ++++++++++++++++++ 3 files changed, 907 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/statistical_testing.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index ed79ef70f8..1b4877c57f 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -474,6 +474,19 @@ cuda_py_test( tags = ["nomsan"], # disable to avoid false positives from scipy. ) +cuda_py_test( + name = "statistical_testing_test", + size = "medium", + srcs = [ + "python/kernel_tests/statistical_testing_test.py", + ], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], +) + cuda_py_test( name = "vector_sinh_arcsinh_diag_test", size = "medium", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py new file mode 100644 index 0000000000..3548ac1807 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -0,0 +1,166 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the statistical testing library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import statistical_testing as st +from tensorflow.python.framework import errors +from tensorflow.python.ops import check_ops +from tensorflow.python.platform import test + + +class StatisticalTestingTest(test.TestCase): + + def test_dkwm_design_mean_one_sample_soundness(self): + numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] + with self.test_session() as sess: + for ff in rates: + for fp in rates: + sufficient_n = st.min_num_samples_for_dkwm_mean_test( + numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( + sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + + def test_dkwm_design_mean_two_sample_soundness(self): + numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] + with self.test_session() as sess: + for ff in rates: + for fp in rates: + (sufficient_n1, + sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( + numbers, 0., 1., 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample + detectable_d = d_fn( + sufficient_n1, 0., 1., sufficient_n2, 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + + def test_true_mean_confidence_interval_by_dkwm_one_sample(self): + rng = np.random.RandomState(seed=0) + + num_samples = 5000 + # 5000 samples is chosen to be enough to find discrepancies of + # size 0.1 or more with assurance 1e-6, as confirmed here: + with self.test_session() as sess: + d = st.min_discrepancy_of_true_means_detectable_by_dkwm( + num_samples, 0., 1., false_fail_rate=1e-6, false_pass_rate=1e-6) + d = sess.run(d) + self.assertLess(d, 0.1) + + # Test that the confidence interval computed for the mean includes + # 0.5 and excludes 0.4 and 0.6. + with self.test_session() as sess: + samples = rng.uniform(size=num_samples).astype(np.float32) + (low, high) = st.true_mean_confidence_interval_by_dkwm( + samples, 0., 1., error_rate=1e-6) + low, high = sess.run([low, high]) + self.assertGreater(low, 0.4) + self.assertLess(low, 0.5) + self.assertGreater(high, 0.5) + self.assertLess(high, 0.6) + + def test_dkwm_mean_one_sample_assertion(self): + rng = np.random.RandomState(seed=0) + num_samples = 5000 + + # Test that the test assertion agrees that the mean of the standard + # uniform distribution is 0.5. + samples = rng.uniform(size=num_samples).astype(np.float32) + with self.test_session() as sess: + sess.run(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.5, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not 0.4. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.4, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not 0.6. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.6, false_fail_rate=1e-6)) + + def test_dkwm_mean_two_sample_assertion(self): + rng = np.random.RandomState(seed=0) + num_samples = 15000 + + # 15000 samples is chosen to be enough to find discrepancies of + # size 0.1 or more with assurance 1e-6, as confirmed here: + with self.test_session() as sess: + d = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( + num_samples, 0., 1., num_samples, 0., 1., + false_fail_rate=1e-6, false_pass_rate=1e-6) + d = sess.run(d) + self.assertLess(d, 0.1) + + # Test that the test assertion agrees that the standard + # uniform distribution has the same mean as itself. + samples1 = rng.uniform(size=num_samples).astype(np.float32) + samples2 = rng.uniform(size=num_samples).astype(np.float32) + with self.test_session() as sess: + sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., samples2, 0., 1., false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is different from the mean of beta(2, 1). + beta_high_samples = rng.beta(2, 1, size=num_samples).astype(np.float32) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., + beta_high_samples, 0., 1., + false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is different from the mean of beta(1, 2). + beta_low_samples = rng.beta(1, 2, size=num_samples).astype(np.float32) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., + beta_low_samples, 0., 1., + false_fail_rate=1e-6)) + + def test_dkwm_argument_validity_checking(self): + rng = np.random.RandomState(seed=0) + samples = rng.uniform(size=5000).astype(np.float32) + + # Test that the test library complains if the given samples fall + # outside the purported bounds. + with self.test_session() as sess: + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.true_mean_confidence_interval_by_dkwm( + samples, 0., 0.5, error_rate=0.5)) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.true_mean_confidence_interval_by_dkwm( + samples, 0.5, 1., error_rate=0.5)) + + # But doesn't complain if they don't. + op = st.true_mean_confidence_interval_by_dkwm( + samples, 0., 1., error_rate=0.5) + _ = sess.run(op) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/statistical_testing.py b/tensorflow/contrib/distributions/python/ops/statistical_testing.py new file mode 100644 index 0000000000..d66c34cc1a --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/statistical_testing.py @@ -0,0 +1,728 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Statistical test assertions calibrated for their error rates.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops + +__all__ = [ + "true_mean_confidence_interval_by_dkwm", + "assert_true_mean_equal_by_dkwm", + "min_discrepancy_of_true_means_detectable_by_dkwm", + "min_num_samples_for_dkwm_mean_test", + "assert_true_mean_equal_by_dkwm_two_sample", + "min_discrepancy_of_true_means_detectable_by_dkwm_two_sample", + "min_num_samples_for_dkwm_mean_two_sample_test", +] + + +def _batch_sort_vector(x, ascending=True, name=None): + with ops.name_scope(name, "sort_each_row", [x]): + x = ops.convert_to_tensor(x, name="x") + n = array_ops.shape(x)[-1] + if ascending: + y, _ = nn_ops.top_k(-x, k=n, sorted=True) + y = -y + else: + y, _ = nn_ops.top_k(x, k=n, sorted=True) + y.set_shape(x.shape) + return y + + +def _do_maximum_mean(samples, envelope, high, name=None): + """Common code between maximum_mean and minimum_mean.""" + with ops.name_scope(name, "do_maximum_mean", [samples, envelope, high]): + n = array_ops.rank(samples) + # Move the batch dimension of `samples` to the rightmost position, + # where the _batch_sort_vector function wants it. + perm = array_ops.concat([math_ops.range(1, n), [0]], axis=0) + samples = array_ops.transpose(samples, perm) + + samples = _batch_sort_vector(samples) + batch_shape = array_ops.shape(samples)[:-1] + n = array_ops.shape(samples)[-1] + step = 1. / math_ops.cast(n, dtype=samples.dtype.base_dtype) + + def _loop_body(iter_, total, to_skip): + total = array_ops.where( + step <= to_skip, + total, + array_ops.where( + to_skip > 0., + total + (step - to_skip) * samples[..., iter_], + total + step * samples[..., iter_])) + to_skip = array_ops.where(step <= to_skip, to_skip - step, 0.) + return [iter_ + 1, total, to_skip] + + _, total, _ = control_flow_ops.while_loop( + cond=lambda iter_, *args: iter_ < n, + body=_loop_body, + loop_vars=[ + 0, + array_ops.zeros(batch_shape, dtype=samples.dtype.base_dtype), + envelope, # to_skip + ]) + + return total + envelope * high + + +def _maximum_mean(samples, envelope, high, name=None): + """Returns a stochastic upper bound on the mean of a scalar distribution. + + The idea is that if the true CDF is within an `eps`-envelope of the + empirical CDF of the samples, and the support is bounded above, then + the mean is bounded above as well. In symbols, + + ```none + sup_x(|F_n(x) - F(x)|) < eps + ``` + + The 0th dimension of `samples` is interpreted as independent and + identically distributed samples. The remaining dimensions are + broadcast together with `envelope` and `high`, and operated on + separately. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `envelope` and `high`. + envelope: Floating-point tensor of sizes of admissible CDF + envelopes (i.e., the `eps` above). + high: Floating-point tensor of upper bounds on the distributions' + supports. + name: A name for this operation (optional). + + Returns: + bound: Floating-point tensor of upper bounds on the true means. + + Raises: + InvalidArgumentError: If some `sample` is found to be larger than + the corresponding `high`. + """ + with ops.name_scope(name, "maximum_mean", [samples, envelope, high]): + samples = ops.convert_to_tensor(samples, name="samples") + envelope = ops.convert_to_tensor(envelope, name="envelope") + high = ops.convert_to_tensor(high, name="high") + + xmax = math_ops.reduce_max(samples, axis=[-1]) + msg = "Given sample maximum value exceeds expectations" + check_op = check_ops.assert_less_equal(xmax, high, message=msg) + with ops.control_dependencies([check_op]): + return array_ops.identity(_do_maximum_mean(samples, envelope, high)) + + +def _minimum_mean(samples, envelope, low, name=None): + """Returns a stochastic lower bound on the mean of a scalar distribution. + + The idea is that if the true CDF is within an `eps`-envelope of the + empirical CDF of the samples, and the support is bounded below, then + the mean is bounded below as well. In symbols, + + ```none + sup_x(|F_n(x) - F(x)|) < eps + ``` + + The 0th dimension of `samples` is interpreted as independent and + identically distributed samples. The remaining dimensions are + broadcast together with `envelope` and `low`, and operated on + separately. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `envelope` and `low`. + envelope: Floating-point tensor of sizes of admissible CDF + envelopes (i.e., the `eps` above). + low: Floating-point tensor of lower bounds on the distributions' + supports. + name: A name for this operation (optional). + + Returns: + bound: Floating-point tensor of lower bounds on the true means. + + Raises: + InvalidArgumentError: If some `sample` is found to be smaller than + the corresponding `low`. + """ + with ops.name_scope(name, "minimum_mean", [samples, envelope, low]): + samples = ops.convert_to_tensor(samples, name="samples") + envelope = ops.convert_to_tensor(envelope, name="envelope") + low = ops.convert_to_tensor(low, name="low") + + xmin = math_ops.reduce_min(samples, axis=[-1]) + msg = "Given sample minimum value falls below expectations" + check_op = check_ops.assert_greater_equal(xmin, low, message=msg) + with ops.control_dependencies([check_op]): + return - _do_maximum_mean(-samples, envelope, -low) + + +def _dkwm_cdf_envelope(n, error_rate, name=None): + """Computes the CDF envelope that the DKWM inequality licenses. + + The [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval) + gives a stochastic bound on the distance between the true cumulative + distribution function (CDF) of any distribution and its empirical + CDF. To wit, for `n` iid samples from any distribution with CDF F, + + ```none + P(sup_x |F_n(x) - F(x)| > eps) < 2exp(-2n eps^2) + ``` + + This function computes the envelope size `eps` as a function of the + number of samples `n` and the desired limit on the left-hand + probability above. + + Args: + n: Tensor of numbers of samples drawn. + error_rate: Floating-point tensor of admissible rates of mistakes. + name: A name for this operation (optional). + + Returns: + eps: Tensor of maximum distances the true CDF can be from the + empirical CDF. This scales as `O(sqrt(-log(error_rate)))` and + as `O(1 / sqrt(n))`. The shape is the broadcast of `n` and + `error_rate`. + """ + with ops.name_scope(name, "dkwm_cdf_envelope", [n, error_rate]): + n = math_ops.cast(n, dtype=error_rate.dtype) + return math_ops.sqrt(-gen_math_ops.log(error_rate / 2.) / (2. * n)) + + +def _check_shape_dominates(tensor, tensors): + """Check that broadcasting `tensor` against `tensors` does not expand it. + + Why? Because I want to be very sure that the samples tensor is not + accidentally enlarged by broadcasting against tensors that are + supposed to be describing the distribution(s) sampled from, lest the + sample counts end up inflated. + + Args: + tensor: A Tensor whose shape is to be protected against broadcasting. + tensors: A list of Tensors to check + + Returns: + tensor: `tf.identity(tensor)` with control dependencies attached; + be sure to use that downstream. + """ + def check(t): + target = array_ops.shape(tensor)[1:] + result = array_ops.broadcast_dynamic_shape(target, array_ops.shape(t)) + # This rank check ensures that I don't get a wrong answer from the + # _shapes_ broadcasting against each other. + gt = check_ops.assert_greater(array_ops.rank(target), array_ops.rank(t)) + eq = check_ops.assert_equal(target, result) + return gt, eq + checks = list(itertools.chain(*[check(t) for t in tensors])) + with ops.control_dependencies(checks): + return array_ops.identity(array_ops.identity(tensor)) + + +def true_mean_confidence_interval_by_dkwm( + samples, low, high, error_rate=1e-6, name=None): + """Computes a confidence interval for the mean of a scalar distribution. + + In batch mode, computes confidence intervals for all distributions + in the batch (which need not be identically distributed). + + Relies on the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + The probability (over the randomness of drawing the given samples) + that any true mean is outside the corresponding returned interval is + no more than the given `error_rate`. The size of the intervals + scale as + `O(1 / sqrt(#samples))`, as `O(high - low)`, and as `O(-log(error_rate))`. + + Note that `error_rate` is a total error rate for all the confidence + intervals in the batch. As such, if the batch is nontrivial, the + error rate is not broadcast but divided (evenly) among the batch + members. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `low` and `high`. + low: Floating-point tensor of lower bounds on the distributions' + supports. + high: Floating-point tensor of upper bounds on the distributions' + supports. + error_rate: *Scalar* admissible total rate of mistakes. + name: A name for this operation (optional). + + Returns: + low: A floating-point tensor of stochastic lower bounds on the true means. + high: A floating-point tensor of stochastic upper bounds on the true means. + """ + with ops.name_scope( + name, "true_mean_confidence_interval_by_dkwm", + [samples, low, high, error_rate]): + samples = ops.convert_to_tensor(samples, name="samples") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + error_rate = ops.convert_to_tensor(error_rate, name="error_rate") + samples = _check_shape_dominates(samples, [low, high]) + check_ops.assert_scalar(error_rate) # Static shape + error_rate = _itemwise_error_rate(error_rate, [low, high], samples) + n = array_ops.shape(samples)[0] + envelope = _dkwm_cdf_envelope(n, error_rate) + min_mean = _minimum_mean(samples, envelope, low) + max_mean = _maximum_mean(samples, envelope, high) + return min_mean, max_mean + + +def _itemwise_error_rate( + total_error_rate, param_tensors, sample_tensor=None, name=None): + with ops.name_scope( + name, "itemwise_error_rate", + [total_error_rate, param_tensors, sample_tensor]): + result_shape = [1] + for p_tensor in param_tensors: + result_shape = array_ops.broadcast_dynamic_shape( + array_ops.shape(p_tensor), result_shape) + if sample_tensor is not None: + result_shape = array_ops.broadcast_dynamic_shape( + array_ops.shape(sample_tensor)[1:], result_shape) + num_items = math_ops.reduce_prod(result_shape) + return total_error_rate / math_ops.cast( + num_items, dtype=total_error_rate.dtype) + + +def assert_true_mean_equal_by_dkwm( + samples, low, high, expected, false_fail_rate=1e-6, name=None): + """Asserts the mean of the given distribution is as expected. + + More precisely, fails if there is enough evidence (using the + [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval)) + that the true mean of some distribution from which the given samples are + drawn is _not_ the given expected mean with statistical significance + `false_fail_rate` or stronger, otherwise passes. If you also want to + check that you are gathering enough evidence that a pass is not + spurious, see `min_num_samples_for_dkwm_mean_test` and + `min_discrepancy_of_true_means_detectable_by_dkwm`. + + Note that `false_fail_rate` is a total false failure rate for all + the assertions in the batch. As such, if the batch is nontrivial, + the assertion will insist on stronger evidence to fail any one member. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `low` and `high`. + low: Floating-point tensor of lower bounds on the distributions' + supports. + high: Floating-point tensor of upper bounds on the distributions' + supports. + expected: Floating-point tensor of expected true means. + false_fail_rate: *Scalar* admissible total rate of mistakes. + name: A name for this operation (optional). + + Returns: + check: Op that raises `InvalidArgumentError` if any expected mean is + outside the corresponding confidence interval. + """ + with ops.name_scope( + name, "assert_true_mean_equal_by_dkwm", + [samples, low, high, expected, false_fail_rate]): + samples = ops.convert_to_tensor(samples, name="samples") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + expected = ops.convert_to_tensor(expected, name="expected") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + samples = _check_shape_dominates(samples, [low, high, expected]) + min_mean, max_mean = true_mean_confidence_interval_by_dkwm( + samples, low, high, error_rate=false_fail_rate) + less_op = check_ops.assert_less( + min_mean, expected, message="Mean confidence interval too high") + with ops.control_dependencies([less_op]): + return check_ops.assert_greater( + max_mean, expected, message="Mean confidence interval too low") + + +def min_discrepancy_of_true_means_detectable_by_dkwm( + n, low, high, false_fail_rate, false_pass_rate, name=None): + """Returns the minimum mean discrepancy that a DKWM-based test can detect. + + DKWM is the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + Note that `false_fail_rate` is a total false failure rate for all + the tests in the batch. As such, if the batch is nontrivial, each + member will demand more samples. The `false_pass_rate` is also + interpreted as a total, but is treated asymmetrically: If each test + in the batch detects its corresponding discrepancy with probability + at least `1 - false_pass_rate`, then running all those tests and + failing if any one fails will jointly detect all those discrepancies + with the same `false_pass_rate`. + + Args: + n: Tensor of numbers of samples to be drawn from the distributions + of interest. + low: Floating-point tensor of lower bounds on the distributions' + supports. + high: Floating-point tensor of upper bounds on the distributions' + supports. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + discr: Tensor of lower bounds on the distances between true + means detectable by a DKWM-based test. + + For each batch member `i`, of `K` total, drawing `n[i]` samples from + some scalar distribution supported on `[low[i], high[i]]` is enough + to detect a difference in means of size `discr[i]` or more. + Specifically, we guarantee that (a) if the true mean is the expected + mean, `assert_true_mean_equal_by_dkwm` will fail with probability at + most `false_fail_rate / K` (which amounts to `false_fail_rate` if + applied to the whole batch at once), and (b) if the true mean + differs from the expected mean by at least `discr[i]`, + `assert_true_mean_equal_by_dkwm` will pass with probability at most + `false_pass_rate`. + + The detectable discrepancy scales as + + - `O(high[i] - low[i])`, + - `O(1 / sqrt(n[i]))`, + - `O(-log(false_fail_rate/K))`, and + - `O(-log(false_pass_rate))`. + """ + with ops.name_scope( + name, "min_discrepancy_of_true_means_detectable_by_dkwm", + [n, low, high, false_fail_rate, false_pass_rate]): + n = ops.convert_to_tensor(n, name="n") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + # Algorithm: Assume a true CDF F. The DKWM inequality gives a + # stochastic bound on how far the observed empirical CDF F_n can be. + # Then, using the DKWM inequality again gives a stochastic bound on + # the farthest candidate true CDF F' that + # true_mean_confidence_interval_by_dkwm might consider. At worst, these + # errors may go in the same direction, so the distance between F and + # F' is bounded by the sum. + # On batching: false fail rates sum, so I need to reduce + # the input to account for the batching. False pass rates + # max, so I don't. + sampling_envelope = _dkwm_cdf_envelope(n, false_pass_rate) + false_fail_rate = _itemwise_error_rate(false_fail_rate, [n, low, high]) + analysis_envelope = _dkwm_cdf_envelope(n, false_fail_rate) + return (high - low) * (sampling_envelope + analysis_envelope) + + +def min_num_samples_for_dkwm_mean_test( + discrepancy, low, high, + false_fail_rate=1e-6, false_pass_rate=1e-6, name=None): + """Returns how many samples suffice for a one-sample DKWM mean test. + + To wit, returns an upper bound on the number of samples necessary to + guarantee detecting a mean difference of at least the given + `discrepancy`, with the given `false_fail_rate` and `false_pass_rate`, + using the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval) + on a scalar distribution supported on `[low, high]`. + + Args: + discrepancy: Floating-point tensor of desired upper limits on mean + differences that may go undetected with probability higher than + `1 - false_pass_rate`. + low: Tensor of lower bounds on the distributions' support. + high: Tensor of upper bounds on the distributions' support. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + n: Tensor of numbers of samples to be drawn from the distributions + of interest. + + The `discrepancy`, `low`, and `high` tensors must have + broadcast-compatible shapes. + + For each batch member `i`, of `K` total, drawing `n[i]` samples from + some scalar distribution supported on `[low[i], high[i]]` is enough + to detect a difference in means of size `discrepancy[i]` or more. + Specifically, we guarantee that (a) if the true mean is the expected + mean, `assert_true_mean_equal_by_dkwm` will fail with probability at + most `false_fail_rate / K` (which amounts to `false_fail_rate` if + applied to the whole batch at once), and (b) if the true mean + differs from the expected mean by at least `discrepancy[i]`, + `assert_true_mean_equal_by_dkwm` will pass with probability at most + `false_pass_rate`. + + The required number of samples scales + as `O((high[i] - low[i])**2)`, `O(-log(false_fail_rate/K))`, + `O(-log(false_pass_rate))`, and `O(1 / discrepancy[i]**2)`. + """ + with ops.name_scope( + name, "min_num_samples_for_dkwm_mean_test", + [low, high, false_fail_rate, false_pass_rate, discrepancy]): + discrepancy = ops.convert_to_tensor( + discrepancy, name="discrepancy") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + # Could choose to cleverly allocate envelopes, but this is sound. + envelope1 = discrepancy / (2. * (high - low)) + envelope2 = envelope1 + false_fail_rate = _itemwise_error_rate( + false_fail_rate, [low, high, discrepancy]) + n1 = -math_ops.log(false_fail_rate / 2.) / (2. * envelope1**2) + n2 = -math_ops.log(false_pass_rate / 2.) / (2. * envelope2**2) + return math_ops.maximum(n1, n2) + + +def assert_true_mean_equal_by_dkwm_two_sample( + samples1, low1, high1, samples2, low2, high2, + false_fail_rate=1e-6, name=None): + """Asserts the means of the given distributions are equal. + + More precisely, fails if there is enough evidence (using the + [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval)) + that the means of the distributions from which the given samples are + drawn are _not_ equal with statistical significance `false_fail_rate` + or stronger, otherwise passes. If you also want to check that you + are gathering enough evidence that a pass is not spurious, see + `min_num_samples_for_dkwm_mean_two_sample_test` and + `min_discrepancy_of_true_means_detectable_by_dkwm_two_sample`. + + Note that `false_fail_rate` is a total false failure rate for all + the assertions in the batch. As such, if the batch is nontrivial, + the assertion will insist on stronger evidence to fail any one member. + + Args: + samples1: Floating-point tensor of samples from the + distribution(s) A. Entries are assumed IID across the 0th + dimension. The other dimensions must broadcast with `low1`, + `high1`, `low2`, and `high2`. + low1: Floating-point tensor of lower bounds on the supports of the + distributions A. + high1: Floating-point tensor of upper bounds on the supports of + the distributions A. + samples2: Floating-point tensor of samples from the + distribution(s) B. Entries are assumed IID across the 0th + dimension. The other dimensions must broadcast with `low1`, + `high1`, `low2`, and `high2`. + low2: Floating-point tensor of lower bounds on the supports of the + distributions B. + high2: Floating-point tensor of upper bounds on the supports of + the distributions B. + false_fail_rate: *Scalar* admissible total rate of mistakes. + name: A name for this operation (optional). + + Returns: + check: Op that raises `InvalidArgumentError` if any pair of confidence + intervals true for corresponding true means do not overlap. + """ + with ops.name_scope( + name, "assert_true_mean_equal_by_dkwm_two_sample", + [samples1, low1, high1, samples2, low2, high2, false_fail_rate]): + samples1 = ops.convert_to_tensor(samples1, name="samples1") + low1 = ops.convert_to_tensor(low1, name="low1") + high1 = ops.convert_to_tensor(high1, name="high1") + samples2 = ops.convert_to_tensor(samples2, name="samples2") + low2 = ops.convert_to_tensor(low2, name="low2") + high2 = ops.convert_to_tensor(high2, name="high2") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + samples1 = _check_shape_dominates(samples1, [low1, high1]) + samples2 = _check_shape_dominates(samples2, [low2, high2]) + compatible_samples = check_ops.assert_equal( + array_ops.shape(samples1)[1:], array_ops.shape(samples2)[1:]) + with ops.control_dependencies([compatible_samples]): + # Could in principle play games with cleverly allocating + # significance instead of the even split below. It may be possible + # to get tighter intervals, in order to obtain a higher power test. + # Any allocation strategy that depends only on the support bounds + # and sample counts should be valid; however, because the intervals + # scale as O(-log(false_fail_rate)), there doesn't seem to be much + # room to win. + min_mean_1, max_mean_1 = true_mean_confidence_interval_by_dkwm( + samples1, low1, high1, false_fail_rate / 2.) + min_mean_2, max_mean_2 = true_mean_confidence_interval_by_dkwm( + samples2, low2, high2, false_fail_rate / 2.) + # I want to assert + # not (max_mean_1 < min_mean_2 or min_mean_1 > max_mean_2), + # but I think I only have and-combination of asserts, so use DeMorgan. + clause1_op = check_ops.assert_greater_equal(max_mean_1, min_mean_2) + with ops.control_dependencies([clause1_op]): + return check_ops.assert_less_equal(min_mean_1, max_mean_2) + + +def min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( + n1, low1, high1, n2, low2, high2, + false_fail_rate, false_pass_rate, name=None): + """Returns the minimum mean discrepancy for a two-sample DKWM-based test. + + DKWM is the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + Note that `false_fail_rate` is a total false failure rate for all + the tests in the batch. As such, if the batch is nontrivial, each + member will demand more samples. The `false_pass_rate` is also + interpreted as a total, but is treated asymmetrically: If each test + in the batch detects its corresponding discrepancy with probability + at least `1 - false_pass_rate`, then running all those tests and + failing if any one fails will jointly detect all those discrepancies + with the same `false_pass_rate`. + + Args: + n1: Tensor of numbers of samples to be drawn from the distributions A. + low1: Floating-point tensor of lower bounds on the supports of the + distributions A. + high1: Floating-point tensor of upper bounds on the supports of + the distributions A. + n2: Tensor of numbers of samples to be drawn from the distributions B. + low2: Floating-point tensor of lower bounds on the supports of the + distributions B. + high2: Floating-point tensor of upper bounds on the supports of + the distributions B. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + discr: Tensor of lower bounds on the distances between true means + detectable by a two-sample DKWM-based test. + + For each batch member `i`, of `K` total, drawing `n1[i]` samples + from scalar distribution A supported on `[low1[i], high1[i]]` and `n2[i]` + samples from scalar distribution B supported on `[low2[i], high2[i]]` + is enough to detect a difference in their true means of size + `discr[i]` or more. Specifically, we guarantee that (a) if their + true means are equal, `assert_true_mean_equal_by_dkwm_two_sample` + will fail with probability at most `false_fail_rate/K` (which + amounts to `false_fail_rate` if applied to the whole batch at once), + and (b) if their true means differ by at least `discr[i]`, + `assert_true_mean_equal_by_dkwm_two_sample` will pass with + probability at most `false_pass_rate`. + + The detectable distribution scales as + + - `O(high1[i] - low1[i])`, `O(high2[i] - low2[i])`, + - `O(1 / sqrt(n1[i]))`, `O(1 / sqrt(n2[i]))`, + - `O(-log(false_fail_rate/K))`, and + - `O(-log(false_pass_rate))`. + """ + with ops.name_scope( + name, "min_discrepancy_of_true_means_detectable_by_dkwm_two_sample", + [n1, low1, high1, n2, low2, high2, false_fail_rate, false_pass_rate]): + n1 = ops.convert_to_tensor(n1, name="n1") + low1 = ops.convert_to_tensor(low1, name="low1") + high1 = ops.convert_to_tensor(high1, name="high1") + n2 = ops.convert_to_tensor(n2, name="n2") + low2 = ops.convert_to_tensor(low2, name="low2") + high2 = ops.convert_to_tensor(high2, name="high2") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + det_disc1 = min_discrepancy_of_true_means_detectable_by_dkwm( + n1, low1, high1, false_fail_rate / 2., false_pass_rate / 2.) + det_disc2 = min_discrepancy_of_true_means_detectable_by_dkwm( + n2, low2, high2, false_fail_rate / 2., false_pass_rate / 2.) + return det_disc1 + det_disc2 + + +def min_num_samples_for_dkwm_mean_two_sample_test( + discrepancy, low1, high1, low2, high2, + false_fail_rate=1e-6, false_pass_rate=1e-6, name=None): + """Returns how many samples suffice for a two-sample DKWM mean test. + + DKWM is the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + Args: + discrepancy: Floating-point tensor of desired upper limits on mean + differences that may go undetected with probability higher than + `1 - false_pass_rate`. + low1: Floating-point tensor of lower bounds on the supports of the + distributions A. + high1: Floating-point tensor of upper bounds on the supports of + the distributions A. + low2: Floating-point tensor of lower bounds on the supports of the + distributions B. + high2: Floating-point tensor of upper bounds on the supports of + the distributions B. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + n1: Tensor of numbers of samples to be drawn from the distributions A. + n2: Tensor of numbers of samples to be drawn from the distributions B. + + For each batch member `i`, of `K` total, drawing `n1[i]` samples + from scalar distribution A supported on `[low1[i], high1[i]]` and `n2[i]` + samples from scalar distribution B supported on `[low2[i], high2[i]]` + is enough to detect a difference in their true means of size + `discr[i]` or more. Specifically, we guarantee that (a) if their + true means are equal, `assert_true_mean_equal_by_dkwm_two_sample` + will fail with probability at most `false_fail_rate/K` (which + amounts to `false_fail_rate` if applied to the whole batch at once), + and (b) if their true means differ by at least `discr[i]`, + `assert_true_mean_equal_by_dkwm_two_sample` will pass with + probability at most `false_pass_rate`. + + The required number of samples scales as + + - `O((high1[i] - low1[i])**2)`, `O((high2[i] - low2[i])**2)`, + - `O(-log(false_fail_rate/K))`, + - `O(-log(false_pass_rate))`, and + - `O(1 / discrepancy[i]**2)`. + """ + with ops.name_scope( + name, "min_num_samples_for_dkwm_mean_two_sample_test", + [low1, high1, low2, high2, + false_fail_rate, false_pass_rate, discrepancy]): + discrepancy = ops.convert_to_tensor(discrepancy, name="discrepancy") + low1 = ops.convert_to_tensor(low1, name="low1") + high1 = ops.convert_to_tensor(high1, name="high1") + low2 = ops.convert_to_tensor(low2, name="low2") + high2 = ops.convert_to_tensor(high2, name="high2") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + # Could choose to cleverly allocate discrepancy tolerances and + # failure probabilities, but this is sound. + n1 = min_num_samples_for_dkwm_mean_test( + discrepancy / 2., low1, high1, + false_fail_rate / 2., false_pass_rate / 2.) + n2 = min_num_samples_for_dkwm_mean_test( + discrepancy / 2., low2, high2, + false_fail_rate / 2., false_pass_rate / 2.) + return n1, n2 -- GitLab From 6d014ecbd63fec208742b327b94c39afd4953fb8 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 2 Mar 2018 15:11:13 -0800 Subject: [PATCH 279/311] ReadVariableOp in C for eager (only for the fastpath) PiperOrigin-RevId: 187676012 --- tensorflow/python/eager/benchmarks_test.py | 21 + tensorflow/python/eager/pywrap_tfe.h | 7 + tensorflow/python/eager/pywrap_tfe_src.cc | 460 ++++++++++++------ tensorflow/python/eager/pywrap_tfe_test.py | 31 ++ .../python/ops/resource_variable_ops.py | 4 + tensorflow/python/pywrap_tfe.i | 1 + 6 files changed, 377 insertions(+), 147 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 527a919ab0..551d5647dd 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -275,6 +275,16 @@ class MicroBenchmarks(test.Benchmark): def _benchmark_read_variable(self, m, num_iters): self._run(m.value, num_iters) + def _benchmark_matmul_read_variable(self, m, num_iters): + self._benchmark_gen_math_ops_matmul( + m, transpose_b=False, num_iters=num_iters) + + def _benchmark_matmul_read_variable_with_tape(self, m, num_iters): + with backprop.GradientTape() as tape: + tape.watch(m) + self._benchmark_gen_math_ops_matmul( + m, transpose_b=False, num_iters=num_iters) + def _benchmark_read_variable_with_tape(self, m, num_iters): with backprop.GradientTape() as tape: tape.watch(m) @@ -416,6 +426,17 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) + def benchmark_matmul_read_variable_op_2_by_2_CPU(self): + with context.device(CPU): + m = resource_variable_ops.ResourceVariable(self._m_2_by_2) + self._benchmark_matmul_read_variable(m, num_iters=self._num_iters_2_by_2) + + def benchmark_matmul_read_variable_op_with_tape_2_by_2_CPU(self): + with context.device(CPU): + m = resource_variable_ops.ResourceVariable(self._m_2_by_2) + self._benchmark_matmul_read_variable_with_tape( + m, num_iters=self._num_iters_2_by_2) + def benchmark_read_variable_op_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index b1b4a6b214..32d731d0f6 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -51,6 +51,13 @@ void TFE_Py_Execute(TFE_Context* ctx, const char* device_name, // This function is not thread-safe. PyObject* TFE_Py_RegisterExceptionClass(PyObject* e); +// Registers e as the type of the ResourceVariable class. +// Returns Py_None if registration succeeds, else throws a TypeError and returns +// NULL. +// +// This function is not thread-safe. +PyObject* TFE_Py_RegisterResourceVariableType(PyObject* e); + // Registers e as the Exception to be raised when the conditions of // TFE_Py_FastPathExecute_C have not been met. When this exception is set, it // is a signal to the calling code that it should fall back to the safer (and diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 42d97dfe3f..27c9d05081 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -38,6 +38,23 @@ using tensorflow::strings::Printf; namespace { +struct FastPathOpExecInfo { + TFE_Context* ctx; + const char* device_name; + // The op def of the main op being executed. + const tensorflow::OpDef* op_def; + + bool run_callbacks; + bool run_post_exec_callbacks; + bool run_gradient_callback; + + // The op name of the main op being executed. + PyObject* name; + // The op type name of the main op being executed. + PyObject* op_name; + PyObject* callbacks; +}; + #define PARSE_VALUE(fn_name, type, check_fn, parse_fn) \ bool fn_name(const string& key, PyObject* py_value, TF_Status* status, \ type* value) { \ @@ -120,6 +137,11 @@ bool ParseTypeValue(const string& key, PyObject* py_value, TF_Status* status, PyObject* py_type_enum = PyObject_GetAttrString(py_value, "_type_enum"); if (py_type_enum == nullptr) { + TF_SetStatus( + status, TF_INVALID_ARGUMENT, + tensorflow::strings::StrCat("Expecting a DType.dtype for attr ", key, + ", got ", py_value->ob_type->tp_name) + .c_str()); return false; } @@ -580,6 +602,8 @@ PyObject* fallback_exception_class = nullptr; // Python function that returns a backward_function. PyObject* backward_function_getter = nullptr; +PyTypeObject* resource_variable_type = nullptr; + tensorflow::mutex _uid_mutex(tensorflow::LINKER_INITIALIZED); tensorflow::int64 _uid GUARDED_BY(_uid_mutex) = 0; @@ -628,11 +652,28 @@ PyObject* TFE_Py_RegisterExceptionClass(PyObject* e) { "TFE_Py_RegisterExceptionClass: " "Registered class should be subclass of Exception."); return nullptr; - } else { - Py_INCREF(e); - exception_class = e; - Py_RETURN_NONE; } + + Py_INCREF(e); + exception_class = e; + Py_RETURN_NONE; +} + +PyObject* TFE_Py_RegisterResourceVariableType(PyObject* e) { + if (!PyType_Check(e)) { + PyErr_SetString( + PyExc_TypeError, + "TFE_Py_RegisterResourceVariableType: Need to register a type."); + return nullptr; + } + + if (resource_variable_type != nullptr) { + Py_DECREF(resource_variable_type); + } + + Py_INCREF(e); + resource_variable_type = reinterpret_cast(e); + Py_RETURN_NONE; } PyObject* TFE_Py_RegisterFallbackExceptionClass(PyObject* e) { @@ -1375,8 +1416,12 @@ PyObject* GetPythonObjectFromString(const char* s) { #endif } -bool CheckEagerTensors(PyObject* seq, int start_index, - const tensorflow::OpDef& op_def) { +bool CheckResourceVariable(PyObject* item) { + return PyObject_TypeCheck(item, resource_variable_type); +} + +bool CheckInputsOk(PyObject* seq, int start_index, + const tensorflow::OpDef& op_def) { for (int i = 0; i < op_def.input_arg_size(); i++) { PyObject* item = PyTuple_GET_ITEM(seq, i + start_index); if (!op_def.input_arg(i).number_attr().empty() || @@ -1384,9 +1429,13 @@ bool CheckEagerTensors(PyObject* seq, int start_index, // This item should be a list input. if (!PyList_Check(item)) return false; for (Py_ssize_t j = 0; j < PyList_Size(item); j++) { - if (!EagerTensor_CheckExact(PyList_GET_ITEM(item, j))) return false; + PyObject* inner_item = PyList_GET_ITEM(item, j); + if (!EagerTensor_CheckExact(inner_item) && + !CheckResourceVariable(inner_item)) { + return false; + } } - } else if (!EagerTensor_CheckExact(item)) { + } else if (!EagerTensor_CheckExact(item) && !CheckResourceVariable(item)) { return false; } } @@ -1394,71 +1443,6 @@ bool CheckEagerTensors(PyObject* seq, int start_index, return true; } -// Adds input and type attr to the op, and to the list of flattened -// inputs/attrs. -bool AddInputToOp(PyObject* input, const tensorflow::OpDef::ArgDef* input_arg, - std::vector* flattened_attrs, - std::vector* flattened_inputs, TFE_Op* op, - TF_Status* status) { - TFE_TensorHandle* input_handle = EagerTensor_Handle(input); - if (input_arg != nullptr && !input_arg->type_attr().empty()) { - auto dtype = TFE_TensorHandleDataType(input_handle); - TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); - if (flattened_attrs != nullptr) { - flattened_attrs->push_back( - GetPythonObjectFromString(input_arg->type_attr().data())); - flattened_attrs->push_back(PyLong_FromLong(dtype)); - } - } - - if (flattened_inputs != nullptr) { - flattened_inputs->push_back(input); - } - TFE_OpAddInput(op, input_handle, status); - if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { - return false; - } - return true; -} - -const tensorflow::OpDef* GetOpDef(PyObject* py_op_name) { - const char* op_name = TFE_GetPythonString(py_op_name); - if (op_name == nullptr) { - PyErr_SetString(PyExc_TypeError, - Printf("expected a string for op_name, got %s instead", - py_op_name->ob_type->tp_name) - .c_str()); - return nullptr; - } - - const tensorflow::OpRegistrationData* op_reg_data = nullptr; - const tensorflow::Status lookup_status = - tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data); - if (MaybeRaiseExceptionFromStatus(lookup_status, nullptr)) { - return nullptr; - } - return &op_reg_data->op_def; -} - -const char* GetDeviceName(PyObject* py_device_name) { - if (py_device_name != Py_None) { - return TFE_GetPythonString(py_device_name); - } - return nullptr; -} - -bool RaiseIfNotPyList(PyObject* list, const string& attr_name) { - if (!PyList_Check(list)) { - PyErr_SetString(PyExc_TypeError, - Printf("expected a list for attr %s, got %s instead", - attr_name.data(), list->ob_type->tp_name) - .data()); - - return false; - } - return true; -} - bool OpDoesntRequireOutput(const string& op_name) { static tensorflow::gtl::FlatSet* ops_that_dont_require_outputs = new tensorflow::gtl::FlatSet({ @@ -1583,7 +1567,6 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, break; } } - if (!should_record) Py_RETURN_NONE; string c_op_name = TFE_GetPythonString(op_name); @@ -1617,50 +1600,212 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, Py_RETURN_NONE; } -bool RunCallbacks(bool run_gradient_callback, bool run_post_exec_callbacks, - const tensorflow::OpDef* op_def, PyObject* args, - const std::vector& flattened_inputs, - const std::vector& flattened_attrs, - PyObject* flattened_result, PyObject* op_name, PyObject* name, - PyObject* callbacks) { - tensorflow::Safe_PyObjectPtr inputs = - tensorflow::make_safe(PyTuple_New(flattened_inputs.size())); +void MaybeWatchVariable(PyObject* input) { + DCHECK(CheckResourceVariable(input)); + DCHECK(PyObject_HasAttrString(input, "_trainable")); + + tensorflow::Safe_PyObjectPtr trainable( + PyObject_GetAttrString(input, "_trainable")); + if (trainable.get() == Py_False) return; + TFE_Py_TapeSetWatchVariable(input); +} + +bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info, + PyObject* input, tensorflow::Safe_PyObjectPtr* output, + TF_Status* status) { + MaybeWatchVariable(input); + + TFE_Op* op = TFE_NewOp(parent_op_exec_info.ctx, "ReadVariableOp", status); + auto cleaner = tensorflow::gtl::MakeCleanup([op] { TFE_DeleteOp(op); }); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Set dtype + DCHECK(PyObject_HasAttrString(input, "_dtype")); + tensorflow::Safe_PyObjectPtr dtype(PyObject_GetAttrString(input, "_dtype")); + int value; + if (!ParseTypeValue("_dtype", dtype.get(), status, &value)) { + return false; + } + TFE_OpSetAttrType(op, "dtype", static_cast(value)); + + TFE_OpSetDevice(op, parent_op_exec_info.device_name, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Get handle + tensorflow::Safe_PyObjectPtr handle(PyObject_GetAttrString(input, "_handle")); + if (!EagerTensor_CheckExact(handle.get())) return false; + TFE_OpAddInput(op, EagerTensor_Handle(handle.get()), status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + int num_retvals = 1; + TFE_TensorHandle* output_handle; + TFE_Execute(op, &output_handle, &num_retvals, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Always create the py object (and correctly DECREF it) from the returned + // value, else the data will leak. + output->reset(EagerTensorFromHandle(output_handle)); + + // TODO(nareshmodi): Should we run post exec callbacks here? + if (parent_op_exec_info.run_gradient_callback) { + tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(1)); + PyTuple_SET_ITEM(inputs.get(), 0, handle.release()); + + tensorflow::Safe_PyObjectPtr outputs(PyTuple_New(1)); + Py_INCREF(output->get()); // stay alive after since tuple steals. + PyTuple_SET_ITEM(outputs.get(), 0, output->get()); + + if (!RecordGradient(GetPythonObjectFromString("ReadVariableOp"), + inputs.get(), Py_None, outputs.get(), Py_None)) { + return false; + } + } + + return true; +} + +// Supports only 2 cases at the moment: +// i) input is an EagerTensor +// ii) input is a ResourceVariable - in this case, the is_variable param is set +// to true. +bool ConvertToTensor(const FastPathOpExecInfo& op_exec_info, PyObject* input, + tensorflow::Safe_PyObjectPtr* output_handle, + TF_Status* status) { + if (CheckResourceVariable(input)) { + return ReadVariableOp(op_exec_info, input, output_handle, status); + } + + Py_INCREF(input); + output_handle->reset(input); + + return true; +} + +// Adds input and type attr to the op, and to the list of flattened +// inputs/attrs. +bool AddInputToOp(const FastPathOpExecInfo& op_exec_info, PyObject* input, + const tensorflow::OpDef::ArgDef* input_arg, + std::vector* flattened_attrs, + std::vector* flattened_inputs, + TFE_Op* op, TF_Status* status) { + // py_eager_tensor's ownership is transferred to flattened_inputs if it is + // required, else the object is destroyed and DECREF'd when the object goes + // out of scope in this function. + tensorflow::Safe_PyObjectPtr py_eager_tensor = nullptr; + + if (!ConvertToTensor(op_exec_info, input, &py_eager_tensor, status)) { + return false; + } + + TFE_TensorHandle* input_handle = EagerTensor_Handle(py_eager_tensor.get()); + + if (input_arg != nullptr && !input_arg->type_attr().empty()) { + auto dtype = TFE_TensorHandleDataType(input_handle); + TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); + if (flattened_attrs != nullptr) { + flattened_attrs->emplace_back( + GetPythonObjectFromString(input_arg->type_attr().data())); + flattened_attrs->emplace_back(PyLong_FromLong(dtype)); + } + } + + if (flattened_inputs != nullptr) { + flattened_inputs->emplace_back(std::move(py_eager_tensor)); + } + + TFE_OpAddInput(op, input_handle, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { + return false; + } + + return true; +} + +const tensorflow::OpDef* GetOpDef(PyObject* py_op_name) { + const char* op_name = TFE_GetPythonString(py_op_name); + if (op_name == nullptr) { + PyErr_SetString(PyExc_TypeError, + Printf("expected a string for op_name, got %s instead", + py_op_name->ob_type->tp_name) + .c_str()); + return nullptr; + } + + const tensorflow::OpRegistrationData* op_reg_data = nullptr; + const tensorflow::Status lookup_status = + tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data); + if (MaybeRaiseExceptionFromStatus(lookup_status, nullptr)) { + return nullptr; + } + return &op_reg_data->op_def; +} + +const char* GetDeviceName(PyObject* py_device_name) { + if (py_device_name != Py_None) { + return TFE_GetPythonString(py_device_name); + } + return nullptr; +} + +bool RaiseIfNotPyList(PyObject* list, const string& attr_name) { + if (!PyList_Check(list)) { + PyErr_SetString(PyExc_TypeError, + Printf("expected a list for attr %s, got %s instead", + attr_name.data(), list->ob_type->tp_name) + .data()); + + return false; + } + return true; +} + +bool RunCallbacks( + const FastPathOpExecInfo& op_exec_info, PyObject* args, + const std::vector& flattened_inputs, + const std::vector& flattened_attrs, + PyObject* flattened_result) { + if (!op_exec_info.run_callbacks) return true; + + tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(flattened_inputs.size())); for (int i = 0; i < flattened_inputs.size(); i++) { - PyObject* input = flattened_inputs[i]; + PyObject* input = flattened_inputs[i].get(); Py_INCREF(input); PyTuple_SET_ITEM(inputs.get(), i, input); } int num_non_inferred_attrs = PyTuple_GET_SIZE(args) - - op_def->input_arg_size() - + op_exec_info.op_def->input_arg_size() - kFastPathExecuteInputStartIndex; int num_attrs = flattened_attrs.size() + num_non_inferred_attrs; - tensorflow::Safe_PyObjectPtr attrs = - tensorflow::make_safe(PyTuple_New(num_attrs)); + tensorflow::Safe_PyObjectPtr attrs(PyTuple_New(num_attrs)); for (int i = 0; i < num_non_inferred_attrs; i++) { - auto* attr = PyTuple_GET_ITEM( - args, kFastPathExecuteInputStartIndex + op_def->input_arg_size() + i); + auto* attr = + PyTuple_GET_ITEM(args, kFastPathExecuteInputStartIndex + + op_exec_info.op_def->input_arg_size() + i); Py_INCREF(attr); PyTuple_SET_ITEM(attrs.get(), i, attr); } for (int i = num_non_inferred_attrs; i < num_attrs; i++) { - // Not INCREFing anything in flattened_attrs as each of those is a new - // reference, so allow the attrs tuple to steal the reference. - PyTuple_SET_ITEM(attrs.get(), i, - flattened_attrs.at(i - num_non_inferred_attrs)); + PyObject* attr_or_name = + flattened_attrs.at(i - num_non_inferred_attrs).get(); + Py_INCREF(attr_or_name); + PyTuple_SET_ITEM(attrs.get(), i, attr_or_name); } - if (run_gradient_callback) { - RecordGradient(op_name, inputs.get(), attrs.get(), flattened_result, name); + if (op_exec_info.run_gradient_callback) { + if (!RecordGradient(op_exec_info.op_name, inputs.get(), attrs.get(), + flattened_result, op_exec_info.name)) { + return false; + } } - if (run_post_exec_callbacks) { - tensorflow::Safe_PyObjectPtr callback_args = tensorflow::make_safe( - Py_BuildValue("OOOOO", op_name, inputs.get(), attrs.get(), - flattened_result, name)); - for (Py_ssize_t i = 0; i < PyList_Size(callbacks); i++) { - PyObject* callback_fn = PyList_GET_ITEM(callbacks, i); + if (op_exec_info.run_post_exec_callbacks) { + tensorflow::Safe_PyObjectPtr callback_args( + Py_BuildValue("OOOOO", op_exec_info.op_name, inputs.get(), attrs.get(), + flattened_result, op_exec_info.name)); + for (Py_ssize_t i = 0; i < PyList_Size(op_exec_info.callbacks); i++) { + PyObject* callback_fn = PyList_GET_ITEM(op_exec_info.callbacks, i); if (!PyCallable_Check(callback_fn)) { PyErr_SetString( PyExc_TypeError, @@ -1695,14 +1840,30 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - TFE_Context* ctx = reinterpret_cast( + FastPathOpExecInfo op_exec_info; + + op_exec_info.ctx = reinterpret_cast( PyCapsule_GetPointer(PyTuple_GET_ITEM(args, 0), nullptr)); - const char* device_name = GetDeviceName(PyTuple_GET_ITEM(args, 1)); - PyObject* op_name = PyTuple_GET_ITEM(args, 2); - const tensorflow::OpDef* op_def = GetOpDef(op_name); - if (op_def == nullptr) return nullptr; - PyObject* name = PyTuple_GET_ITEM(args, 3); - PyObject* callbacks = PyTuple_GET_ITEM(args, 4); + op_exec_info.device_name = GetDeviceName(PyTuple_GET_ITEM(args, 1)); + op_exec_info.op_name = PyTuple_GET_ITEM(args, 2); + op_exec_info.op_def = GetOpDef(op_exec_info.op_name); + if (op_exec_info.op_def == nullptr) return nullptr; + op_exec_info.name = PyTuple_GET_ITEM(args, 3); + op_exec_info.callbacks = PyTuple_GET_ITEM(args, 4); + + const tensorflow::OpDef* op_def = op_exec_info.op_def; + + // TODO(nareshmodi): Add a benchmark for the fast-path with gradient callbacks + // (similar to benchmark_tf_gradient_function_*). Also consider using an + // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks + // point out problems with heap allocs. + op_exec_info.run_gradient_callback = + !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); + op_exec_info.run_post_exec_callbacks = + op_exec_info.callbacks != Py_None && + PyList_Size(op_exec_info.callbacks) > 0; + op_exec_info.run_callbacks = op_exec_info.run_gradient_callback || + op_exec_info.run_post_exec_callbacks; if (args_size < kFastPathExecuteInputStartIndex + op_def->input_arg_size()) { PyErr_SetString( @@ -1715,7 +1876,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - if (!CheckEagerTensors(args, kFastPathExecuteInputStartIndex, *op_def)) { + if (!CheckInputsOk(args, kFastPathExecuteInputStartIndex, *op_def)) { RaiseFallbackException( "This function does not handle the case of the path where " "all inputs are not already EagerTensors."); @@ -1723,7 +1884,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { } TF_Status* status = TF_NewStatus(); - TFE_Op* op = TFE_NewOp(ctx, op_def->name().c_str(), status); + TFE_Op* op = TFE_NewOp(op_exec_info.ctx, op_def->name().c_str(), status); auto cleaner = tensorflow::gtl::MakeCleanup([status, op] { TF_DeleteStatus(status); TFE_DeleteOp(op); @@ -1750,8 +1911,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { // OpRegistrationData. for (const auto& attr : op_def->attr()) { if (attr_name == attr.name()) { - SetOpAttrWithDefaults(ctx, op, attr, attr_name.data(), py_attr_value, - &attr_list_sizes, status); + SetOpAttrWithDefaults(op_exec_info.ctx, op, attr, attr_name.data(), + py_attr_value, &attr_list_sizes, status); if (TF_GetCode(status) != TF_OK) { RaiseFallbackException(TF_Message(status)); @@ -1763,33 +1924,28 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { } } - TFE_OpSetDevice(op, device_name, status); + TFE_OpSetDevice(op, op_exec_info.device_name, status); if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { return nullptr; } - // TODO(nareshmodi): Add a benchmark for the fast-path with gradient callbacks - // (similar to benchmark_tf_gradient_function_*). Also consider using an - // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks - // point out problems with heap allocs. - bool run_gradient_callback = - !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); - bool run_post_exec_callbacks = - callbacks != Py_None && PyList_Size(callbacks) > 0; - bool run_callbacks = run_gradient_callback || run_post_exec_callbacks; // Flat attrs and inputs as required by the record_gradient call. The attrs // here only contain inferred attrs (non-inferred attrs are added directly // from the input args). - // All items in flattened_attrs contain new references. - // All items in flattened_inputs contain borrowed references. + // All items in flattened_attrs and flattened_inputs contain + // Safe_PyObjectPtr - any time something steals a reference to this, it must + // INCREF. // TODO(nareshmodi): figure out why PyList_New/PyList_Append don't work // directly. - std::unique_ptr> flattened_attrs = nullptr; - std::unique_ptr> flattened_inputs = nullptr; + std::unique_ptr> flattened_attrs = + nullptr; + std::unique_ptr> flattened_inputs = + nullptr; - if (run_callbacks) { - flattened_attrs.reset(new std::vector); - flattened_inputs.reset(new std::vector); + // TODO(nareshmodi): Encapsulate callbacks information into a struct. + if (op_exec_info.run_callbacks) { + flattened_attrs.reset(new std::vector); + flattened_inputs.reset(new std::vector); } // Add inferred attrs and inputs. @@ -1809,16 +1965,16 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_ssize_t len = PyList_Size(input); TFE_OpSetAttrInt(op, input_arg.number_attr().data(), len); - if (run_callbacks) { - flattened_attrs->push_back( + if (op_exec_info.run_callbacks) { + flattened_attrs->emplace_back( GetPythonObjectFromString(input_arg.number_attr().data())); - flattened_attrs->push_back(PyLong_FromLong(len)); + flattened_attrs->emplace_back(PyLong_FromLong(len)); } attr_list_sizes[input_arg.number_attr()] = len; if (len > 0) { // First item adds the type attr. - if (!AddInputToOp(PyList_GET_ITEM(input, 0), &input_arg, + if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, 0), &input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; @@ -1826,7 +1982,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { for (Py_ssize_t j = 1; j < len; j++) { // Since the list is homogeneous, we don't need to re-add the attr. - if (!AddInputToOp(PyList_GET_ITEM(input, j), nullptr /* input_arg */, + if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, j), + nullptr /* input_arg */, nullptr /* flattened_attrs */, flattened_inputs.get(), op, status)) { return nullptr; @@ -1840,12 +1997,20 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_ssize_t len = PyList_Size(input); tensorflow::gtl::InlinedVector attr_value(len); PyObject* py_attr_value = nullptr; - if (run_callbacks) { + if (op_exec_info.run_callbacks) { py_attr_value = PyTuple_New(len); } for (Py_ssize_t j = 0; j < len; j++) { PyObject* py_input = PyList_GET_ITEM(input, j); - TFE_TensorHandle* input_handle = EagerTensor_Handle(py_input); + tensorflow::Safe_PyObjectPtr py_eager_tensor; + if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor, + status)) { + return nullptr; + } + + TFE_TensorHandle* input_handle = + EagerTensor_Handle(py_eager_tensor.get()); + attr_value[j] = TFE_TensorHandleDataType(input_handle); TFE_OpAddInput(op, input_handle, status); @@ -1853,22 +2018,23 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - if (run_callbacks) { - flattened_inputs->push_back(py_input); + if (op_exec_info.run_callbacks) { + flattened_inputs->emplace_back(std::move(py_eager_tensor)); PyTuple_SET_ITEM(py_attr_value, j, PyLong_FromLong(attr_value[j])); } } - if (run_callbacks) { - flattened_attrs->push_back(GetPythonObjectFromString(attr_name.data())); - flattened_attrs->push_back(py_attr_value); + if (op_exec_info.run_callbacks) { + flattened_attrs->emplace_back( + GetPythonObjectFromString(attr_name.data())); + flattened_attrs->emplace_back(py_attr_value); } TFE_OpSetAttrTypeList(op, attr_name.data(), attr_value.data(), attr_value.size()); attr_list_sizes[attr_name] = len; } else { // The item is a single item. - if (!AddInputToOp(input, &input_arg, flattened_attrs.get(), + if (!AddInputToOp(op_exec_info, input, &input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; } @@ -1892,12 +2058,14 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_BEGIN_ALLOW_THREADS; TFE_Execute(op, retvals.data(), &num_retvals, status); Py_END_ALLOW_THREADS; + if (TF_GetCode(status) != TF_OK) { // Augment the status with the op_name for easier debugging similar to // TFE_Py_Execute. TF_SetStatus(status, TF_GetCode(status), - tensorflow::strings::StrCat(TF_Message(status), " [Op:", - TFE_GetPythonString(op_name), "]") + tensorflow::strings::StrCat( + TF_Message(status), + " [Op:", TFE_GetPythonString(op_exec_info.op_name), "]") .c_str()); MaybeRaiseExceptionFromTFStatus(status, nullptr); @@ -1909,10 +2077,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyList_SET_ITEM(flat_result, i, EagerTensorFromHandle(retvals[i])); } - if (run_callbacks && - !RunCallbacks(run_gradient_callback, run_post_exec_callbacks, op_def, - args, *flattened_inputs, *flattened_attrs, flat_result, - op_name, name, callbacks)) { + if (!RunCallbacks(op_exec_info, args, *flattened_inputs, *flattened_attrs, + flat_result)) { return nullptr; } diff --git a/tensorflow/python/eager/pywrap_tfe_test.py b/tensorflow/python/eager/pywrap_tfe_test.py index 418ed75178..46c5601f47 100644 --- a/tensorflow/python/eager/pywrap_tfe_test.py +++ b/tensorflow/python/eager/pywrap_tfe_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import resource_variable_ops class Tests(test.TestCase): @@ -53,6 +54,21 @@ class Tests(test.TestCase): ctx._handle, ctx.device_name, "MatMul", None, None, a_100_by_784, b_100_by_784, "transpose_a", False, "transpose_b", True)) + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created + def testFastpathExecute_ResourceVariableMatMulCorrectResponse(self): + ctx = context.context() + a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) + m = resource_variable_ops.ResourceVariable(a_2_by_2) + x = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, m, m, "transpose_a", + False, "transpose_b", False) + y = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, a_2_by_2, + "transpose_a", False, "transpose_b", False) + + self.assertAllEqual(x, y) + @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created def testFastpathExecute_TapeWrite(self): @@ -67,6 +83,21 @@ class Tests(test.TestCase): self.assertAllEqual(dz_dy.numpy(), constant_op.constant(4.0, shape=[2, 2]).numpy()) + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created + def testFastpathExecute_ResourceVariableTapeWrite(self): + ctx = context.context() + with backprop.GradientTape(persistent=True) as tape: + a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) + m = resource_variable_ops.ResourceVariable(a_2_by_2) + tape.watch(m) + z = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, m, m, + "transpose_a", False, "transpose_b", False) + dz_dy = tape.gradient(z, [m])[0] + self.assertAllEqual(dz_dy.numpy(), + constant_op.constant(4.0, shape=[2, 2]).numpy()) + # Tests homogeneous list op @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index cbac3c686d..6c5d692e82 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import variable_pb2 +from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import tape from tensorflow.python.framework import dtypes @@ -932,6 +933,9 @@ class ResourceVariable(variables.Variable): "Tensor object.") +pywrap_tensorflow.TFE_Py_RegisterResourceVariableType(ResourceVariable) + + def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False): return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref) # pylint: disable=protected-access diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 7ab0db5268..b481ddf5d4 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -31,6 +31,7 @@ limitations under the License. %rename("%s") TFE_Py_RegisterExceptionClass; %rename("%s") TFE_Py_RegisterBackwardFunctionGetter; %rename("%s") TFE_Py_RegisterFallbackExceptionClass; +%rename("%s") TFE_Py_RegisterResourceVariableType; %rename("%s") TFE_Py_Execute; %rename("%s") TFE_Py_FastPathExecute; %rename("%s") TFE_Py_RecordGradient; -- GitLab From 9a45b6bdf0246477754f50fab357e568051bed4f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 15:24:33 -0800 Subject: [PATCH 280/311] Adds setUseNNAPI to Interpreter.java, to enable develoeprs turn on & off NNAPI. PiperOrigin-RevId: 187677765 --- .../java/org/tensorflow/lite/Interpreter.java | 10 +++- .../org/tensorflow/lite/InterpreterTest.java | 48 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index b071cda5df..9e47e921a6 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -167,7 +167,6 @@ public final class Interpreter implements AutoCloseable { return wrapper.getOutputIndex(opName); } - /** * Returns native inference timing. *

IllegalArgumentException will be thrown if the model is not initialized by the @@ -180,6 +179,15 @@ public final class Interpreter implements AutoCloseable { return wrapper.getLastNativeInferenceDurationNanoseconds(); } + /** Turns on/off Android NNAPI for hardware acceleration when it is available. */ + public void setUseNNAPI(boolean useNNAPI) { + if (wrapper != null) { + wrapper.setUseNNAPI(useNNAPI); + } else { + throw new IllegalStateException("NativeInterpreterWrapper has already been closed."); + } + } + /** Release resources associated with the {@code Interpreter}. */ @Override public void close() { diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index 424b3de6c9..61d6c35ec8 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -218,4 +218,52 @@ public final class InterpreterTest { int index = interpreter.getOutputIndex("MobilenetV1/Predictions/Softmax"); assertThat(index).isEqualTo(0); } + + @Test + public void testTurnOffNNAPI() throws Exception { + Path path = MODEL_FILE.toPath(); + FileChannel fileChannel = + (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)); + MappedByteBuffer mappedByteBuffer = + fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + Interpreter interpreter = new Interpreter(mappedByteBuffer); + interpreter.setUseNNAPI(true); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + interpreter.run(fourD, parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.setUseNNAPI(false); + interpreter.run(fourD, parsedOutputs); + outputOneD = parsedOutputs[0][0][0]; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.close(); + fileChannel.close(); + } + + @Test + public void testTurnOnNNAPI() throws Exception { + Path path = MODEL_FILE.toPath(); + FileChannel fileChannel = + (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)); + MappedByteBuffer mappedByteBuffer = + fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + Interpreter interpreter = new Interpreter(mappedByteBuffer); + interpreter.setUseNNAPI(true); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + interpreter.run(fourD, parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.close(); + fileChannel.close(); + } } -- GitLab From e8e4e5b99b721dcd79e0d1a9a7fe6bfb990744ba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 15:25:41 -0800 Subject: [PATCH 281/311] Fix some compiler warnings in MKL build. PiperOrigin-RevId: 187677893 --- tensorflow/core/BUILD | 6 ++- .../core/common_runtime/mkl_cpu_allocator.cc | 27 ++++++++++ .../core/common_runtime/mkl_cpu_allocator.h | 4 +- tensorflow/core/graph/mkl_graph_util.h | 4 +- tensorflow/core/graph/mkl_layout_pass.cc | 13 ++--- tensorflow/core/kernels/BUILD | 50 +++++++++---------- tensorflow/core/kernels/mkl_concat_op.cc | 12 +++-- .../core/kernels/mkl_conv_grad_bias_ops.cc | 5 +- tensorflow/core/kernels/mkl_conv_ops.cc | 6 ++- .../core/kernels/mkl_fused_batch_norm_op.cc | 1 - tensorflow/core/kernels/mkl_lrn_op.cc | 9 ++-- tensorflow/core/kernels/mkl_maxpooling_op.cc | 7 +-- tensorflow/core/kernels/mkl_relu_op.cc | 1 - tensorflow/core/ops/nn_ops.cc | 8 +-- 14 files changed, 93 insertions(+), 60 deletions(-) create mode 100644 tensorflow/core/common_runtime/mkl_cpu_allocator.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 3271825251..3a436ff680 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1920,7 +1920,7 @@ tf_cuda_library( ) + if_mkl( [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ], ), alwayslink = 1, @@ -2135,6 +2135,7 @@ tf_cuda_library( "common_runtime/graph_runner.cc", "common_runtime/local_device.cc", "common_runtime/memory_types.cc", + "common_runtime/mkl_cpu_allocator.cc", "common_runtime/optimization_registry.cc", "common_runtime/parallel_concat_optimizer.cc", "common_runtime/placer.cc", @@ -2174,6 +2175,7 @@ tf_cuda_library( ] + if_mkl( [ "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", ], ), alwayslink = 1, @@ -2218,7 +2220,7 @@ tf_cuda_library( ] + if_mkl( [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ], ) + tf_additional_core_deps() + if_static([":core_cpu_impl"]), alwayslink = 1, diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc new file mode 100644 index 0000000000..43a909466e --- /dev/null +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc @@ -0,0 +1,27 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" + +namespace tensorflow { + +constexpr const char* MklCPUAllocator::kMaxLimitStr; +constexpr const size_t MklCPUAllocator::kDefaultMaxLimit; + +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index fb092424bf..55c8411ad0 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -53,7 +53,7 @@ class MklCPUAllocator : public VisitableAllocator { static constexpr const char* kMaxLimitStr = "TF_MKL_ALLOC_MAX_BYTES"; /// Default upper limit on allocator size - 64GB - static const size_t kDefaultMaxLimit = 64LL << 30; + static constexpr size_t kDefaultMaxLimit = 64LL << 30; MklCPUAllocator() { TF_CHECK_OK(Initialize()); } @@ -158,7 +158,7 @@ class MklCPUAllocator : public VisitableAllocator { static constexpr const char* kName = "mklcpu"; /// The alignment that we need for the allocations - static const size_t kAlignment = 64; + static constexpr const size_t kAlignment = 64; VisitableAllocator* allocator_; // owned by this class }; diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index 1b99d54e8e..5f51d6083b 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -90,7 +90,7 @@ inline string GetMklOpName(const string& name) { // @input: name of the op // @input: T datatype to be used for checking op // @return: true if opname is registered as Mkl op; false otherwise -static inline bool IsMklOp(const std::string& op_name, DataType T) { +static inline bool IsMklOp(const string& op_name, DataType T) { string kernel = KernelsRegisteredForOp(op_name); bool result = kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); @@ -104,7 +104,7 @@ static inline bool IsMklOp(const std::string& op_name, DataType T) { // @input: T datatype to be used for checking op // @return: true if opname is registered as element-wise Mkl op; // false otherwise -static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) { +static inline bool IsMklElementWiseOp(const string& op_name, DataType T) { if (!IsMklOp(op_name, T)) { return false; } diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 7d3be15299..02038c5d77 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// TODO(intel): Improve error handling in this file; instead of CHECK failing +// all over the place, we should log an error and execute the original graph. #ifdef INTEL_MKL #include @@ -1030,8 +1032,7 @@ void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr* g, TensorProto proto; proto.set_dtype(dt); uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - proto.set_tensor_content(const_cast(static_cast(&zero)), - 8); + proto.set_tensor_content(string(reinterpret_cast(zero), 8)); TensorShape dummy_shape({8}); dummy_shape.AsProto(proto.mutable_tensor_shape()); TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") @@ -1144,7 +1145,8 @@ int MklLayoutRewritePass::SetUpContiguousInputs( // For that let's first find filter node that is 2nd input (slot 1) // of BackpropInput. Node* filter_node = nullptr; - old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node); + TF_CHECK_OK(old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, + &filter_node)); CHECK_NOTNULL(filter_node); // Now check which nodes receive from filter_node. Filter feeds as @@ -1323,8 +1325,7 @@ void MklLayoutRewritePass::GetDummyWorkspaceTensorNode( TensorProto proto; proto.set_dtype(dt); float zero[1] = {0}; - proto.set_tensor_content(const_cast(static_cast(&zero)), - 4); + proto.set_tensor_content(string(reinterpret_cast(&zero), 4)); TensorShape dummy_shape({1}); dummy_shape.AsProto(proto.mutable_tensor_shape()); TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") @@ -1829,7 +1830,7 @@ Status MklLayoutRewritePass::MergeNode(std::unique_ptr* g, Node* succ, // Create node. Node* new_node; - nb.Finalize(&**g, &new_node); + TF_CHECK_OK(nb.Finalize(&**g, &new_node)); CHECK_NOTNULL(new_node); // Set the Mkl layer label for this op. diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index feacee5d63..52be90ea1f 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -879,7 +879,7 @@ tf_kernel_library( hdrs = ["transpose_op.h"], deps = ARRAY_DEPS + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ]), ) @@ -2810,7 +2810,7 @@ tf_kernel_library( "//conditions:default": [], }) + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ]) + if_cuda([ "//tensorflow/core/platform/default/build_config:cublas_plugin", ]), @@ -5850,10 +5850,9 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5867,10 +5866,9 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5898,6 +5896,7 @@ tf_mkl_kernel_library( ], hdrs = ["mkl_pooling_ops_common.h"], deps = [ + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -5919,10 +5918,10 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ + "//third_party/eigen3", "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5936,19 +5935,18 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( name = "mkl_fused_batch_norm_op", srcs = ["mkl_fused_batch_norm_op.cc"], - deps = NN_DEPS + if_mkl([ + deps = NN_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5962,10 +5960,10 @@ tf_mkl_kernel_library( tf_mkl_kernel_library( name = "mkl_concat_op", prefix = "mkl_concat_op", - deps = ARRAY_DEPS + if_mkl([ + deps = ARRAY_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5979,19 +5977,19 @@ tf_mkl_kernel_library( tf_mkl_kernel_library( name = "mkl_identity_op", prefix = "mkl_identity_op", - deps = ARRAY_DEPS + if_mkl([ + deps = ARRAY_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( name = "mkl_lrn_op", prefix = "mkl_lrn_op", - deps = NN_DEPS + if_mkl([ + deps = NN_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index f1f267e849..aa3ea890b0 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -519,9 +519,11 @@ class MklConcatOp : public OpKernel { mkl_tensor_tf_shape.AddDim( SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension())); int tf_output_index = 0; - context->allocate_output( + // TODO(jktomer): replace this with OP_REQUIRES_OK and clean up this file + // to propagate the status up the call stack. + TF_CHECK_OK(context->allocate_output( GetTensorMetaDataIndex(tf_output_index, context->num_outputs()), - mkl_tensor_tf_shape, &mkl_tensor); + mkl_tensor_tf_shape, &mkl_tensor)); mkl_tensor_mkl_shape.SerializeMklShape( mkl_tensor->flat().data(), mkl_tensor->flat().size() * sizeof(uint8)); @@ -549,9 +551,11 @@ class MklConcatOp : public OpKernel { mkl_tensor_tf_shape.AddDim( SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension())); int tf_output_index = 0; - context->allocate_output( + // TODO(jktomer): replace this with OP_REQUIRES_OK and clean up this file + // to propagate the status up the call stack. + TF_CHECK_OK(context->allocate_output( GetTensorMetaDataIndex(tf_output_index, context->num_outputs()), - mkl_tensor_tf_shape, &mkl_tensor); + mkl_tensor_tf_shape, &mkl_tensor)); mkl_tensor_mkl_shape.SerializeMklShape( mkl_tensor->flat().data(), mkl_tensor->flat().size() * sizeof(uint8)); diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc index 25c2573741..d23027a54d 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc @@ -79,8 +79,9 @@ class MklConv2DCustomBackpropBiasOp : public OpKernel { } else if (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW) { mkl_context.c_size = GetTensorDim(input, data_format_, 'C'); } else { - errors::InvalidArgument("Unknown format ", - " Format must be either NCHW or NHWC. "); + context->CtxFailure(errors::InvalidArgument( + "Unknown format ", " Format must be either NCHW or NHWC. ")); + return; } TensorShape output_shape{mkl_context.c_size}; diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 2953426d58..1440da8f82 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -294,8 +294,10 @@ class MklConv2DOp : public OpKernel { mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, dnnResourceFilter); - size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1), - filter.dim_size(2), filter.dim_size(3)}; + size_t filter_sizes[4] = {static_cast(filter.dim_size(0)), + static_cast(filter.dim_size(1)), + static_cast(filter.dim_size(2)), + static_cast(filter.dim_size(3))}; mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes, mkl_context.filter_strides); diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 8313224d7f..9b2146aca3 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -262,7 +262,6 @@ class MklFusedBatchNormOp : public OpKernel { } void MklCreateInputLayout(OpKernelContext* context) { - const Tensor& input = MklGetInput(context, 0); bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); if (input_in_mkl_format) { mkl_lt_input = diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index 5f0a12a1fb..282012c719 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -88,7 +88,8 @@ class MklLRNOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); workspace_enabled_ = false; - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -357,7 +358,8 @@ class MklLRNGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); workspace_enabled_ = false; - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -535,7 +537,6 @@ class MklLRNGradOp : public OpKernel { Tensor* mkl_tmp_outimage_buf_tensor) { const Tensor& in_grads = MklGetInput(context, 0); const Tensor& in_image = MklGetInput(context, 1); - const Tensor& out_image = MklGetInput(context, 2); const Tensor& workspace = MklGetInput( context, 3); /*Worskpsace is enabled, get the buffer to the workspace */ @@ -544,8 +545,6 @@ class MklLRNGradOp : public OpKernel { static_cast(in_grads.flat().data())); void* user_fwd_input = const_cast( static_cast(in_image.flat().data())); - void* user_fwd_output = const_cast( - static_cast(out_image.flat().data())); void* workspace_buffer = const_cast( static_cast(workspace.flat().data())); diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index 14607f26e0..ea537524b1 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -69,7 +69,8 @@ class MklMaxPoolingOp : public OpKernel { // We may not get this attribute for this node if it does not go through // graph rewrite pass. So we do not check for error while retrieving this // attribute value. - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -118,7 +119,6 @@ class MklMaxPoolingOp : public OpKernel { mkl_out_shape); Tensor* workspace_tensor; - void* workspace_buf = nullptr; TensorShape workspace_shape; mkl_workspace_shape.SetMklTensor(false); @@ -226,7 +226,8 @@ class MklMaxPoolingGradOp : public OpKernel { // We may not get this attribute for this node if it does not go through // graph rewrite pass. So we do not check for error while retrieving this // attribute value. - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 51db3991e2..f006954c03 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -25,7 +25,6 @@ limitations under the License. #include "mkl_dnn.h" #include "mkl_dnn_types.h" -#include "tensorflow/core/platform/default/logging.h" #include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 67481fd202..910fbaca9e 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -2007,10 +2007,10 @@ REGISTER_OP("_MklFusedBatchNorm") TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &x)); bool is_training; - c->GetAttr("is_training", &is_training); + TF_RETURN_IF_ERROR(c->GetAttr("is_training", &is_training)); int number_inputs = (is_training) ? 3 : 5; string data_format; - c->GetAttr("data_format", &data_format); + TF_RETURN_IF_ERROR(c->GetAttr("data_format", &data_format)); DimensionHandle channel_dim = (data_format == "NHWC") ? c->Dim(x, 3) : c->Dim(x, 1); @@ -2076,8 +2076,8 @@ REGISTER_OP("_MklFusedBatchNormGrad") bool is_training; string data_format; - c->GetAttr("is_training", &is_training); - c->GetAttr("data_format", &data_format); + TF_RETURN_IF_ERROR(c->GetAttr("is_training", &is_training)); + TF_RETURN_IF_ERROR(c->GetAttr("data_format", &data_format)); DimensionHandle channel_dim = (data_format == "NHWC") ? c->Dim(y_backprop, 3) : c->Dim(y_backprop, 1); -- GitLab From 737d2e73c82abe35ae76bd7d17793243f3dc9dd5 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Fri, 2 Mar 2018 15:52:32 -0800 Subject: [PATCH 282/311] Exit with failure if a free gpu is not found by parallel_gpu_execute. If TF_GPU_COUNT was a value greater than the actual number of GPUs, it was possible for tests to just pass without running when running under parallel_gpu_execute.sh. PiperOrigin-RevId: 187681032 --- tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh index cfeaebdbf5..d0816c92b7 100755 --- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh +++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh @@ -54,3 +54,6 @@ for i in `seq 0 $((TF_GPU_COUNT-1))`; do fi done +echo "Cannot find a free GPU to run the test $* on, exiting with failure..." +exit 1 + -- GitLab From c12f0c5f84699835f9b8111299febf9fc7aba343 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 2 Mar 2018 16:05:05 -0800 Subject: [PATCH 283/311] eager/examples: Use tf.keras.Model in RNN examples. Some notable differences between tf.keras.Model and tfe.Network: - tf.keras.Model doesn't have a track_layer() method. It tracks Layer and Checkpointable valued attributes automatically. For list and other complex structures, __setattr__ performs the role of tfe.Network.track_layer() - tf.keras.Model accepts a single positional argument. Thus either all arguments must be packaged into a single list/tuple (as in rnn_ptb.py) or be provided as keyword arguments (as in rnn_colorbot.py). PiperOrigin-RevId: 187682716 --- .../examples/rnn_colorbot/rnn_colorbot.py | 40 ++++++++++-------- .../eager/python/examples/rnn_ptb/rnn_ptb.py | 42 ++++++++++++------- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py index aa87b94e7b..29f0232454 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py +++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py @@ -109,7 +109,7 @@ def load_dataset(data_dir, url, batch_size): # pylint: disable=not-callable -class RNNColorbot(tfe.Network): +class RNNColorbot(tf.keras.Model): """Multi-layer (LSTM) RNN that regresses on real-valued vector labels. """ @@ -127,23 +127,20 @@ class RNNColorbot(tfe.Network): self.label_dimension = label_dimension self.keep_prob = keep_prob - # Note the calls to `track_layer` below; these calls register the layers as - # network components that house trainable variables. - self.cells = [ - self.track_layer(tf.nn.rnn_cell.BasicLSTMCell(size)) - for size in rnn_cell_sizes - ] - self.relu = self.track_layer( - tf.layers.Dense(label_dimension, activation=tf.nn.relu, name="relu")) + self.cells = self._add_cells( + [tf.nn.rnn_cell.BasicLSTMCell(size) for size in rnn_cell_sizes]) + self.relu = tf.layers.Dense( + label_dimension, activation=tf.nn.relu, name="relu") - def call(self, chars, sequence_length, training=False): + def call(self, inputs, training=False): """Implements the RNN logic and prediction generation. Args: - chars: a Tensor of dimension [batch_size, time_steps, 256] holding a - batch of one-hot encoded color names - sequence_length: a Tensor of dimension [batch_size] holding the length - of each character sequence (i.e., color name) + inputs: A tuple (chars, sequence_length), where chars is a batch of + one-hot encoded color names represented as a Tensor with dimensions + [batch_size, time_steps, 256] and sequence_length holds the length + of each character sequence (color name) as a Tensor with dimension + [batch_size]. training: whether the invocation is happening during training Returns: @@ -151,6 +148,7 @@ class RNNColorbot(tfe.Network): passing chars through a multi-layer RNN and applying a ReLU to the final hidden state. """ + (chars, sequence_length) = inputs # Transpose the first and second dimensions so that chars is of shape # [time_steps, batch_size, dimension]. chars = tf.transpose(chars, [1, 0, 2]) @@ -181,6 +179,14 @@ class RNNColorbot(tfe.Network): hidden_states = tf.gather_nd(chars, indices) return self.relu(hidden_states) + def _add_cells(self, cells): + # "Magic" required for keras.Model classes to track all the variables in + # a list of tf.layers.Layer objects. + # TODO(ashankar): Figure out API so user code doesn't have to do this. + for i, c in enumerate(cells): + setattr(self, "cell-%d" % i, c) + return cells + def loss(labels, predictions): """Computes mean squared loss.""" @@ -191,7 +197,7 @@ def test(model, eval_data): """Computes the average loss on eval_data, which should be a Dataset.""" avg_loss = tfe.metrics.Mean("loss") for (labels, chars, sequence_length) in tfe.Iterator(eval_data): - predictions = model(chars, sequence_length, training=False) + predictions = model((chars, sequence_length), training=False) avg_loss(loss(labels, predictions)) print("eval/loss: %.6f\n" % avg_loss.result()) with tf.contrib.summary.always_record_summaries(): @@ -204,7 +210,7 @@ def train_one_epoch(model, optimizer, train_data, log_interval=10): tf.train.get_or_create_global_step() def model_loss(labels, chars, sequence_length): - predictions = model(chars, sequence_length, training=True) + predictions = model((chars, sequence_length), training=True) loss_value = loss(labels, predictions) tf.contrib.summary.scalar("loss", loss_value) return loss_value @@ -277,7 +283,7 @@ def main(_): (chars, length) = (tf.identity(chars), tf.identity(length)) chars = tf.expand_dims(chars, 0) length = tf.expand_dims(length, 0) - preds = tf.unstack(model(chars, length, training=False)[0]) + preds = tf.unstack(model((chars, length), training=False)[0]) # Predictions cannot be negative, as they are generated by a ReLU layer; # they may, however, be greater than 1. diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py index 5c5c59c877..69cd16d12c 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py @@ -39,21 +39,23 @@ from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn from tensorflow.contrib.eager.python import tfe -class RNN(tfe.Network): +class RNN(tf.keras.Model): """A static RNN. - Similar to tf.nn.static_rnn, implemented as a tf.layer.Layer. + Similar to tf.nn.static_rnn, implemented as a class. """ def __init__(self, hidden_dim, num_layers, keep_ratio): super(RNN, self).__init__() self.keep_ratio = keep_ratio - for _ in range(num_layers): - self.track_layer(tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)) + self.cells = self._add_cells([ + tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim) + for _ in range(num_layers) + ]) def call(self, input_seq, training): batch_size = int(input_seq.shape[1]) - for c in self.layers: + for c in self.cells: state = c.zero_state(batch_size, tf.float32) outputs = [] input_seq = tf.unstack(input_seq, num=int(input_seq.shape[0]), axis=0) @@ -64,7 +66,19 @@ class RNN(tfe.Network): input_seq = tf.stack(outputs, axis=0) if training: input_seq = tf.nn.dropout(input_seq, self.keep_ratio) - return input_seq, None + # Returning a list instead of a single tensor so that the line: + # y = self.rnn(y, ...)[0] + # in PTBModel.call works for both this RNN and CudnnLSTM (which returns a + # tuple (output, output_states). + return [input_seq] + + def _add_cells(self, cells): + # "Magic" required for keras.Model classes to track all the variables in + # a list of tf.layers.Layer objects. + # TODO(ashankar): Figure out API so user code doesn't have to do this. + for i, c in enumerate(cells): + setattr(self, "cell-%d" % i, c) + return cells class Embedding(tf.layers.Layer): @@ -87,7 +101,8 @@ class Embedding(tf.layers.Layer): return tf.nn.embedding_lookup(self.embedding, x) -class PTBModel(tfe.Network): +# pylint: disable=not-callable +class PTBModel(tf.keras.Model): """LSTM for word language modeling. Model described in: @@ -109,19 +124,16 @@ class PTBModel(tfe.Network): self.keep_ratio = 1 - dropout_ratio self.use_cudnn_rnn = use_cudnn_rnn - self.embedding = self.track_layer(Embedding(vocab_size, embedding_dim)) + self.embedding = Embedding(vocab_size, embedding_dim) if self.use_cudnn_rnn: self.rnn = cudnn_rnn.CudnnLSTM( num_layers, hidden_dim, dropout=dropout_ratio) else: self.rnn = RNN(hidden_dim, num_layers, self.keep_ratio) - self.track_layer(self.rnn) - self.linear = self.track_layer( - tf.layers.Dense( - vocab_size, - kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))) + self.linear = tf.layers.Dense( + vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1)) self._output_shape = [-1, embedding_dim] def call(self, input_seq, training): @@ -136,7 +148,7 @@ class PTBModel(tfe.Network): y = self.embedding(input_seq) if training: y = tf.nn.dropout(y, self.keep_ratio) - y, _ = self.rnn(y, training=training) + y = self.rnn(y, training=training)[0] return self.linear(tf.reshape(y, self._output_shape)) @@ -148,7 +160,7 @@ def clip_gradients(grads_and_vars, clip_ratio): def loss_fn(model, inputs, targets, training): labels = tf.reshape(targets, [-1]) - outputs = model(inputs, training) + outputs = model(inputs, training=training) return tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=outputs)) -- GitLab From 284dac189dcae46c77f1ec70055b13e69c31e4c0 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 16:06:24 -0800 Subject: [PATCH 284/311] Checkpointable: Fix CPU/GPU device placement issues Restore ops go on the CPU, then the value gets copied to whichever device it needs to be on. This I need to do manually for restores passed as initial_values; for regular save/restore it's done by the SaveableObjects for variables. Also explicitly places some counters on the CPU. Adds a GPU-using test for Checkpointable usage. PiperOrigin-RevId: 187683050 --- tensorflow/contrib/eager/python/BUILD | 7 ++- .../eager/python/checkpointable_utils.py | 50 +++++++++++-------- .../eager/python/checkpointable_utils_test.py | 2 +- tensorflow/python/BUILD | 1 + tensorflow/python/training/checkpointable.py | 18 ++++--- 5 files changed, 45 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 6fb8287030..7fde53476d 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -243,13 +243,13 @@ py_library( ], ) -py_test( +cuda_py_test( name = "checkpointable_utils_test", srcs = ["checkpointable_utils_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":checkpointable_utils", ":network", + "@six_archive//:six", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", @@ -265,7 +265,6 @@ py_test( "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", "//tensorflow/python/keras", - "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index 89cd543f77..cd742991af 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -549,7 +549,8 @@ class CheckpointableSaver(object): # `Checkpointable` objects save themselves. self._root_checkpointable_ref = root_checkpointable if context.in_graph_mode(): - self._file_prefix_placeholder = constant_op.constant("model") + with ops.device("/cpu:0"): + self._file_prefix_placeholder = constant_op.constant("model") else: self._file_prefix_placeholder = None @@ -601,14 +602,16 @@ class CheckpointableSaver(object): if session is None: session = ops.get_default_session() if self._object_graph_feed_tensor is None: - self._object_graph_feed_tensor = constant_op.constant( - "", dtype=dtypes.string) + with ops.device("/cpu:0"): + self._object_graph_feed_tensor = constant_op.constant( + "", dtype=dtypes.string) object_graph_tensor = self._object_graph_feed_tensor feed_additions = {object_graph_tensor: graph_proto.SerializeToString()} else: session = None - object_graph_tensor = constant_op.constant( - graph_proto.SerializeToString(), dtype=dtypes.string) + with ops.device("/cpu:0"): + object_graph_tensor = constant_op.constant( + graph_proto.SerializeToString(), dtype=dtypes.string) feed_additions = None assert _OBJECT_GRAPH_PROTO_KEY not in named_variables named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable( @@ -627,12 +630,13 @@ class CheckpointableSaver(object): self._last_save_object_graph = graph_proto else: saver = self._last_save_saver - save_path = saver.save( - sess=_SessionWithFeedDictAdditions( - session=session, feed_additions=feed_additions), - save_path=file_prefix, - write_meta_graph=False, - global_step=checkpoint_number) + with ops.device("/cpu:0"): + save_path = saver.save( + sess=_SessionWithFeedDictAdditions( + session=session, feed_additions=feed_additions), + save_path=file_prefix, + write_meta_graph=False, + global_step=checkpoint_number) return save_path def _global_variable_names(self): @@ -718,16 +722,18 @@ class CheckpointableSaver(object): file_prefix_feed_dict = {self._file_prefix_placeholder: save_path} else: session = None - file_prefix_tensor = constant_op.constant(save_path) + with ops.device("/cpu:0"): + file_prefix_tensor = constant_op.constant(save_path) file_prefix_feed_dict = None try: if not in_graph_mode or self._object_graph_restore_tensor is None: - object_graph_string, = io_ops.restore_v2( - prefix=file_prefix_tensor, - tensor_names=[_OBJECT_GRAPH_PROTO_KEY], - shape_and_slices=[""], - dtypes=[dtypes.string], - name="object_graph_proto_read") + with ops.device("/cpu:0"): + object_graph_string, = io_ops.restore_v2( + prefix=file_prefix_tensor, + tensor_names=[_OBJECT_GRAPH_PROTO_KEY], + shape_and_slices=[""], + dtypes=[dtypes.string], + name="object_graph_proto_read") if in_graph_mode: self._object_graph_restore_tensor = object_graph_string if in_graph_mode: @@ -826,8 +832,9 @@ class Checkpoint(core_checkpointable.Checkpointable): """Create a save counter if it does not yet exist.""" if self._save_counter is None: # Initialized to 0 and incremented before saving. - self._save_counter = add_variable( - self, name="save_counter", initializer=0, dtype=dtypes.int64) + with ops.device("/cpu:0"): + self._save_counter = add_variable( + self, name="save_counter", initializer=0, dtype=dtypes.int64) @property def save_counter(self): @@ -852,7 +859,8 @@ class Checkpoint(core_checkpointable.Checkpointable): # needs to be initialized before assign_add. This is only an issue if # restore() has not been called first. session.run(self.save_counter.initializer) - assign_op = self.save_counter.assign_add(1) + with ops.colocate_with(self.save_counter): + assign_op = self.save_counter.assign_add(1) if in_graph_mode: session.run(assign_op) return self._saver.save( diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index c9db2bcafc..9ec89edce8 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -387,7 +387,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with ops.Graph().as_default(), self.test_session( - graph=ops.get_default_graph()): + graph=ops.get_default_graph()), test_util.device(use_gpu=True): model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index f282abb0a5..db17a3fe02 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2882,6 +2882,7 @@ py_library( srcs = ["training/checkpointable.py"], srcs_version = "PY2AND3", deps = [ + ":array_ops", ":dtypes", ":io_ops_gen", ":ops", diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index 02c3aebda8..92e8ff3308 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -22,6 +22,7 @@ import collections from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_io_ops as io_ops from tensorflow.python.util import nest @@ -181,13 +182,16 @@ class _CheckpointPosition(object): dtype = self._checkpoint.dtype_map[checkpoint_key] base_type = dtype.base_dtype with ops.init_scope(): - value, = io_ops.restore_v2( - prefix=self._checkpoint.save_path, - tensor_names=[checkpoint_key], - shape_and_slices=[""], - dtypes=[base_type], - name="%s_checkpoint_read" % (serialized_tensor.name,)) - value_tensors[serialized_tensor.name] = value + with ops.device("/cpu:0"): + # Run the restore itself on the CPU. + value, = io_ops.restore_v2( + prefix=self._checkpoint.save_path, + tensor_names=[checkpoint_key], + shape_and_slices=[""], + dtypes=[base_type], + name="%s_checkpoint_read" % (serialized_tensor.name,)) + # Copy the value to the current device if necessary. + value_tensors[serialized_tensor.name] = array_ops.identity(value) return value_tensors def restore_ops(self): -- GitLab From 4df167ac55346357afd612d15674c7556e21ab00 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 16:25:21 -0800 Subject: [PATCH 285/311] Loop optimizer: Convert StackPush nodes to Identity instead of eliminating them completely. Move loop optimizer to run before dependency optimizer so identity nodes will be pruned. PiperOrigin-RevId: 187685669 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/loop_optimizer.cc | 91 ++++++++++--------- .../optimizers/loop_optimizer_test.cc | 74 ++++++++++----- .../grappler/optimizers/meta_optimizer.cc | 22 ++--- tensorflow/core/grappler/utils.cc | 8 +- tensorflow/core/grappler/utils.h | 1 + 6 files changed, 117 insertions(+), 80 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 037438ee75..7ec137373b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -521,6 +521,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ + ":constant_folding", ":graph_optimizer", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index cc226c01db..9e427001d5 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -18,10 +18,12 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -31,55 +33,60 @@ namespace tensorflow { namespace grappler { namespace { +std::vector GetStackPushNodesToConvert(const SimpleGraphView& graph_view, + int stack_node_idx) { + VLOG(1) << "Stack node: " << graph_view.graph()->node(stack_node_idx).name(); + const std::unordered_set op_types_to_traverse( + {"Stack", "StackV2", "Enter", "RefEnter", "Switch", "RefSwitch", + "Identity", "RefIdentity"}); + std::vector nodes_to_convert; + std::set fanout; + graph_view.DepthFirstSearch(op_types_to_traverse, stack_node_idx, &fanout); + for (int fanout_idx : fanout) { + const NodeDef& fanout_node = graph_view.graph()->node(fanout_idx); + VLOG(1) << "Fanout " << fanout_idx << " : " << fanout_node.name(); + if (IsStackPushOp(fanout_node)) { + nodes_to_convert.push_back(fanout_idx); + } else if (IsStackOp(fanout_node) || IsStackCloseOp(fanout_node) || + op_types_to_traverse.find(fanout_node.op()) != + op_types_to_traverse.end()) { + continue; + } else { + // The node is either a StackPop node or something unexpected behind which + // may hide a StackPop node, so we leave the graph alone. + nodes_to_convert.clear(); + break; + } + } + return nodes_to_convert; +} + Status RemoveStackOps(const GraphDef& graph, GraphDef* optimized_graph) { + *optimized_graph = graph; + NodeMap node_map(optimized_graph); SimpleGraphView graph_view; TF_RETURN_IF_ERROR(graph_view.Initialize(graph)); - const std::unordered_set op_types_to_traverse( - {"Stack", "StackV2", "Enter", "Switch", "RefSwitch", "Identity"}); - std::set nodes_to_delete; for (int node_idx = 0; node_idx < graph.node_size(); ++node_idx) { - const NodeDef& node = graph.node(node_idx); - if (IsStackOp(node)) { - std::set nodes_found; - graph_view.DepthFirstSearch(op_types_to_traverse, node_idx, &nodes_found); - bool found_pop = false; - bool found_unexpected = false; - for (int found_idx : nodes_found) { - const NodeDef& node = graph.node(found_idx); - if (IsStackPushOp(node) || IsStackOp(node) || IsStackCloseOp(node)) { - continue; - } else if (IsStackPopOp(node)) { - found_pop = true; - } else { - // Don't modify the graph if we found an unexpected op. There may be - // a pop hiding behind it. - found_unexpected = true; + if (IsStackOp(graph.node(node_idx))) { + for (int push_node_idx : + GetStackPushNodesToConvert(graph_view, node_idx)) { + // We found push nodes without corresponding pops. Convert them to + // Identity passing the data through and add a control dependency from + // the op supplying the handle. + NodeDef* push_node = optimized_graph->mutable_node(push_node_idx); + VLOG(1) << "Converting " << push_node_idx << " : " + << push_node->DebugString(); + if (push_node->attr().count("swap_memory") != 0) { + push_node->mutable_attr()->erase("swap_memory"); } + push_node->set_op("Identity"); + push_node->mutable_input()->SwapElements(0, 1); + const string ctrl_dep = ConstantFolding::AddControlDependency( + push_node->input(1), optimized_graph, &node_map); + push_node->set_input(1, ctrl_dep); + VLOG(1) << "After converting: " << push_node->DebugString(); } - if (!found_unexpected && !found_pop) { - VLOG(1) << "Found stack node with no pop: " << node.DebugString(); - // Remove all pushes. - for (int found_idx : nodes_found) { - const NodeDef& node = graph.node(found_idx); - if (IsStackPushOp(node)) { - nodes_to_delete.insert(found_idx); - } - } - } - } - } - - *optimized_graph = graph; - if (!nodes_to_delete.empty()) { - int last = optimized_graph->node_size() - 1; - for (auto it = nodes_to_delete.rbegin(); it != nodes_to_delete.rend(); - ++it) { - const int node_to_delete = *it; - optimized_graph->mutable_node()->SwapElements(node_to_delete, last); - --last; } - optimized_graph->mutable_node()->DeleteSubrange(last + 1, - nodes_to_delete.size()); } return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index bb2ee6b02b..cc9dd22b9e 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -59,34 +59,46 @@ TEST_F(LoopOptimizerTest, NoOp) { namespace { NodeDef* AddNode(const string& name, const string& op, - const std::vector& inputs, GraphDef* graph) { + const std::vector& inputs, + const std::vector>& attributes, + GraphDef* graph) { NodeDef* node = graph->add_node(); node->set_name(name); node->set_op(op); for (const string& input : inputs) { node->add_input(input); } + for (auto attr : attributes) { + (*node->mutable_attr())[attr.first] = attr.second; + } return node; } } // namespace TEST_F(LoopOptimizerTest, RemovePush_NoOp) { GrapplerItem item; + AttrValue frame_name; + frame_name.set_s("foo"); + AttrValue type; + type.set_type(DT_RESOURCE); GraphDef& graph = item.graph; + AddNode("c", "Const", {}, {}, &graph); // Stack with corresponding push/pop. - AddNode("stack1", "StackV2", {}, &graph); - AddNode("push1", "StackPushV2", {"stack1"}, &graph); - AddNode("pop1", "StackPopV2", {"stack1"}, &graph); + AddNode("stack1", "StackV2", {}, {}, &graph); + AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); + AddNode("pop1", "StackPopV2", {"stack1"}, {}, &graph); // Stack with corresponding push/pop behind Enter. - AddNode("stack2", "StackV2", {}, &graph); - AddNode("push_enter", "Enter", {"stack1"}, &graph); - AddNode("push2", "StackPushV2", {"push_enter"}, &graph); - AddNode("pop_enter", "Enter", {"stack1"}, &graph); - AddNode("pop2", "StackPopV2", {"pop_enter"}, &graph); + AddNode("stack2", "StackV2", {}, {}, &graph); + AddNode("push_enter", "Enter", {"stack2"}, + {{"T", type}, {"frame_name", frame_name}}, &graph); + AddNode("push2", "StackPushV2", {"push_enter", "c"}, {}, &graph); + AddNode("pop_enter", "Enter", {"stack2"}, + {{"T", type}, {"frame_name", frame_name}}, &graph); + AddNode("pop2", "StackPopV2", {"pop_enter"}, {}, &graph); // Stack with unexpected op type in fanout of Stack. - AddNode("stack3", "StackV2", {}, &graph); - AddNode("push3", "StackPushV2", {"stack3"}, &graph); - AddNode("stop", "StopGradient", {"stack3"}, &graph); + AddNode("stack3", "StackV2", {}, {}, &graph); + AddNode("push3", "StackPushV2", {"stack3", "c"}, {}, &graph); + AddNode("stop", "StopGradient", {"stack3"}, {}, &graph); LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -97,23 +109,39 @@ TEST_F(LoopOptimizerTest, RemovePush_NoOp) { TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { GrapplerItem item; GraphDef& graph = item.graph; - AddNode("stack1", "StackV2", {}, &graph); - AddNode("push1", "StackPushV2", {"stack1"}, &graph); - AddNode("stack2", "StackV2", {}, &graph); - AddNode("push_enter", "Enter", {"stack2"}, &graph); - AddNode("push2", "StackPushV2", {"push_enter"}, &graph); + AttrValue frame_name; + frame_name.set_s("foo"); + AttrValue type; + type.set_type(DT_RESOURCE); + AddNode("c", "Const", {}, {}, &graph); + AddNode("stack1", "StackV2", {}, {}, &graph); + AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); + AddNode("stack2", "StackV2", {}, {}, &graph); + AddNode("push_enter", "Enter", {"stack2"}, + {{"T", type}, {"frame_name", frame_name}}, &graph); + AddNode("push2", "StackPushV2", {"push_enter", "c"}, {}, &graph); LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(3, output.node_size()); - int found = 0; + EXPECT_EQ(6, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { - if (output.node(i).name() == "stack1") ++found; - if (output.node(i).name() == "push_enter") ++found; - if (output.node(i).name() == "stack2") ++found; + const NodeDef& node = output.node(i); + if (node.name() == "push1") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("c", node.input(0)); + EXPECT_EQ("^stack1", node.input(1)); + } else if (node.name() == "push2") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("c", node.input(0)); + EXPECT_EQ("^push_enter", node.input(1)); + } else { + const NodeDef& orig_node = item.graph.node(i); + EXPECT_EQ(orig_node.ShortDebugString(), node.ShortDebugString()); + } } - EXPECT_EQ(3, found); } } // namespace diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index fff1e354f4..6fa8c03548 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -77,13 +77,13 @@ std::unique_ptr MetaOptimizer::NewOptimizer( graph_optimizer.reset( new AutoParallel(cfg_.auto_parallel().num_replicas())); } + if (optimizer == "loop") { + graph_optimizer.reset(new LoopOptimizer(cfg_.loop_optimization())); + } if (optimizer == "dependency") { graph_optimizer.reset( new DependencyOptimizer(cfg_.dependency_optimization())); } - if (optimizer == "loop") { - graph_optimizer.reset(new LoopOptimizer(cfg_.loop_optimization())); - } return graph_optimizer; } @@ -106,14 +106,14 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } - if (cfg_.dependency_optimization() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new DependencyOptimizer(cfg_.dependency_optimization()))); - } if (cfg_.loop_optimization() == RewriterConfig::ON) { optimizers.push_back(std::unique_ptr( new LoopOptimizer(cfg_.loop_optimization()))); } + if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new DependencyOptimizer(cfg_.dependency_optimization()))); + } if (cfg_.layout_optimizer() != RewriterConfig::OFF) { optimizers.push_back( std::unique_ptr(new LayoutOptimizer())); @@ -136,8 +136,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } } else { const std::set available_optimizers = { - "pruning", "function", "constfold", "layout", "memory", - "autoparallel", "arithmetic", "dependency", "loop"}; + "pruning", "function", "constfold", "layout", "memory", + "autoparallel", "arithmetic", "loop", "dependency"}; std::vector custom_optimizer_names; for (const auto& optimizer_name : cfg_.optimizers()) { if (available_optimizers.find(optimizer_name) != @@ -233,9 +233,9 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.layout_optimizer() != RewriterConfig::OFF || cfg.function_optimization() == RewriterConfig::ON || cfg.constant_folding() != RewriterConfig::OFF || - cfg.dependency_optimization() != RewriterConfig::OFF || - cfg.loop_optimization() == RewriterConfig::ON || cfg.arithmetic_optimization() != RewriterConfig::OFF || + cfg.loop_optimization() == RewriterConfig::ON || + cfg.dependency_optimization() != RewriterConfig::OFF || cfg.auto_parallel().enable() || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || !cfg.optimizers().empty(); diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index a611a93086..eb1f882ff1 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -398,12 +398,12 @@ Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs, void SimpleGraphView::DepthFirstSearch( const std::unordered_set& op_types_to_traverse, int node_idx, std::set* nodes_found) const { - const NodeDef& node = graph_->node(node_idx); - if (op_types_to_traverse.find(node.op()) == op_types_to_traverse.end()) { - nodes_found->insert(node_idx); + if (nodes_found->find(node_idx) != nodes_found->end()) { return; } - if (nodes_found->find(node_idx) != nodes_found->end()) { + nodes_found->insert(node_idx); + const string& op_type = graph_->node(node_idx).op(); + if (op_types_to_traverse.find(op_type) == op_types_to_traverse.end()) { return; } for (auto output_idx : this->outputs(node_idx)) { diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index 1b91a57154..fbd38c1531 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -178,6 +178,7 @@ class SimpleGraphView { Status Initialize(const GraphDef& graph, bool dedup_inputs, bool dedup_outputs); + const GraphDef* graph() const { return graph_; } inline int num_nodes() const { return index_to_name_.size(); } inline const int index(const string& node_name) const { const auto& it = name_to_index_.find(node_name); -- GitLab From 1bbb03eb59fcb3a4b52c45d0063dcc9875206910 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 16:58:11 -0800 Subject: [PATCH 286/311] Don't throw errors if non-Checkpointable objects are passed to MultiRNNCell PiperOrigin-RevId: 187689371 --- tensorflow/python/ops/rnn_cell_impl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index bd7c731210..3ae1d1184d 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -46,6 +46,7 @@ from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -1190,7 +1191,9 @@ class MultiRNNCell(RNNCell): for cell_number, cell in enumerate(self._cells): # Add Checkpointable dependencies on these cells so their variables get # saved with this object when using object-based saving. - self._track_checkpointable(cell, name="cell-%d" % (cell_number,)) + if isinstance(cell, checkpointable.CheckpointableBase): + # TODO(allenl): Track down non-Checkpointable callers. + self._track_checkpointable(cell, name="cell-%d" % (cell_number,)) self._state_is_tuple = state_is_tuple if not state_is_tuple: if any(nest.is_sequence(c.state_size) for c in self._cells): -- GitLab From 0c92f574d18cd01134bb9f7a5a679866a0f92f7e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 2 Mar 2018 17:18:00 -0800 Subject: [PATCH 287/311] Properly handle the case of functions with no inputs PiperOrigin-RevId: 187691555 --- .../grappler/optimizers/function_optimizer.cc | 12 +++++- .../optimizers/function_optimizer_test.cc | 34 +++++++++++++++++ .../core/grappler/utils/functions_test.cc | 37 +++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 167e5a153a..4b830bcc6e 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -126,9 +126,17 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { std::unordered_map functions; for (const FunctionDef& func : item.graph.library().function()) { - if (func.attr().count("_noinline") == 0) { - functions[func.signature().name()] = &func; + // Don't inline functions marked as noinline + if (func.attr().count("_noinline") != 0) { + continue; } + // Can't create IdentityN nodes with no input or output: skip these + // functions for now. + if (func.signature().input_arg_size() == 0 || + func.signature().output_arg_size() == 0) { + continue; + } + functions[func.signature().name()] = &func; } // Nothing to do. diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 5072abaac7..8db9b7f77a 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -339,6 +339,40 @@ TEST_F(FunctionOptimizerTest, FunctionWithInputForwarding) { test::ExpectTensorEqual(tensors_expected[2], tensors[2]); } +TEST_F(FunctionOptimizerTest, FunctionWithoutInput) { + const Tensor kTwo = test::AsScalar(2); + FunctionDef func = FunctionDefHelper::Define( + // Name + "GenerateTwo", + // Args + {}, + // Return value + {"o: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"o"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}}); + + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("y", "GenerateTwo", {}, {}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + func, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + // For now we won't inline the function. + EXPECT_EQ(item.graph.DebugString(), output.DebugString()); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc index 25ec50d478..6a7d766b1c 100644 --- a/tensorflow/core/grappler/utils/functions_test.cc +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -308,6 +308,43 @@ TEST_F(FunctionsTest, FromFunctionDefWithInputForwarding) { } } +TEST_F(FunctionsTest, FromFunctionDefWithoutInput) { + const Tensor kTwo = test::AsScalar(2); + FunctionDef func = FunctionDefHelper::Define( + // Name + "GenerateTwo", + // Args + {}, + // Return value + {"o: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"o"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}}); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + EXPECT_EQ(0, item->feed.size()); + EXPECT_EQ(1, item->fetch.size()); + EXPECT_EQ("o:0", item->fetch[0]); + + EXPECT_EQ(2, item->graph.node_size()); + const NodeDef &two = item->graph.node(0); + EXPECT_EQ("two", two.name()); + EXPECT_EQ(0, two.input_size()); + const NodeDef &cast = item->graph.node(1); + EXPECT_EQ("o", cast.name()); + EXPECT_EQ(1, cast.input_size()); + EXPECT_EQ("two:0", cast.input(0)); + + std::cout << item->graph.DebugString() << std::endl; +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 9886d918a2b160b95cf410516d61ec5d3174cc83 Mon Sep 17 00:00:00 2001 From: Yin Li Date: Tue, 14 Nov 2017 20:26:58 +0800 Subject: [PATCH 288/311] Fold batch norm with batch to space --- .../graph_transforms/fold_old_batch_norms.cc | 67 +++++++++++++ .../fold_old_batch_norms_test.cc | 95 +++++++++++++++++++ 2 files changed, 162 insertions(+) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d89afe85c7..d86f65325b 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -182,6 +182,36 @@ Status FuseBatchNormWithConv(const NodeMatch& match, return Status::OK(); } +Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, + std::vector* new_nodes) { + // Calculate the scale and offset values to apply. + std::vector scale_values; + std::vector offset_values; + TF_RETURN_IF_ERROR( + GetScaleAndOffsetValues(match, &scale_values, &offset_values)); + + // Fuse conv weights, and set the final output node name as batch_norm_node. + const NodeDef& batch_norm_node = match.node; + const NodeMatch& batch_to_space_node_match = match.inputs[0]; + const NodeMatch& conv_node_match = batch_to_space_node_match.inputs[0]; + const NodeDef& batch_to_space_node = batch_to_space_node_match.node; + const NodeDef& conv_node = conv_node_match.node; + + string biasadd_name = conv_node.name() + "/biasadd"; + TF_RETURN_IF_ERROR( + FuseScaleOffsetToConvWeights(scale_values, offset_values, conv_node_match, + biasadd_name , new_nodes)); + + NodeDef new_batch_to_space_node = batch_to_space_node; + // reuse batch_norm node name + new_batch_to_space_node.set_name(batch_norm_node.name()); + new_batch_to_space_node.set_input(0, biasadd_name); + new_nodes->push_back(batch_to_space_node_match.inputs[1].node); + new_nodes->push_back(batch_to_space_node_match.inputs[2].node); + new_nodes->push_back(new_batch_to_space_node); + return Status::OK(); +} + Status FuseBatchNormWithConvConcat(const NodeMatch& match, std::vector* new_nodes) { // Calculate the scale and offset values to apply. @@ -284,6 +314,43 @@ Status FoldOldBatchNorms(const GraphDef& input_graph_def, current_graph_def = replaced_graph_def; } while (did_graph_change); + do { + did_graph_change = false; + GraphDef replaced_graph_def; + TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( + current_graph_def, // clang-format off + {"BatchNormWithGlobalNormalization|FusedBatchNorm", // batch_norm_node + { + {"BatchToSpaceND", // batch_to_space_node + { + {"Conv2D", // conv_node + { + {"*"}, // input_node + {"Const"}, // weights_node + } + }, + {"Const"}, // block_shape + {"Const"}, // crops + } + }, + {"Const"}, // mean_node + {"Const"}, // variance_node + {"Const"}, // beta_node + {"Const"}, // gamma_node + } + }, // clang-format on + [&did_graph_change](const NodeMatch& match, + const std::set& input_nodes, + const std::set& output_nodes, + std::vector* new_nodes) { + TF_RETURN_IF_ERROR(FuseBatchNormWithBatchToSpace(match, new_nodes)); + did_graph_change = true; + return Status::OK(); + }, + {}, &replaced_graph_def)); + current_graph_def = replaced_graph_def; + } while (did_graph_change); + do { did_graph_change = false; GraphDef replaced_graph_def; diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index b30ba9ac8b..272410c693 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -298,6 +299,96 @@ class FoldOldBatchNormsTest : public ::testing::Test { } }; +void TestFoldFusedBatchNormsWithBatchToSpace() { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + Tensor input_data(DT_FLOAT, TensorShape({2, 1, 3, 2})); + test::FillValues( + &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, + -5.0f, -3.0f, -6.0f}); + Output input_op = + Const(root.WithOpName("input_op"), Input::Initializer(input_data)); + + Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); + test::FillValues(&weights_data, + {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); + Output weights_op = + Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); + + Output conv_op = Conv2D(root.WithOpName("conv_op"), input_op, weights_op, + {1, 1, 1, 1}, "VALID"); + + Tensor block_shape_data(DT_INT32, TensorShape({2})); + test::FillValues(&block_shape_data, {1, 2}); + Output block_shape_op = + Const(root.WithOpName("block_shape_op"), Input::Initializer(block_shape_data)); + + Tensor crops_data(DT_INT32, TensorShape({2, 2})); + test::FillValues(&crops_data, {0, 0, 0, 1}); + Output crops_op = + Const(root.WithOpName("crops_op"), Input::Initializer(crops_data)); + + Output batch_to_space_op = BatchToSpaceND(root.WithOpName("batch_to_space_op"), + conv_op, block_shape_op, crops_data); + + Tensor mean_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&mean_data, {10.0f, 20.0f}); + Output mean_op = + Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); + + Tensor variance_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&variance_data, {0.25f, 0.5f}); + Output variance_op = Const(root.WithOpName("variance_op"), + Input::Initializer(variance_data)); + + Tensor beta_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&beta_data, {0.1f, 0.6f}); + Output beta_op = + Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); + + Tensor gamma_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&gamma_data, {1.0f, 2.0f}); + Output gamma_op = + Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); + + GraphDef original_graph_def; + TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); + + NodeDef batch_norm_node; + batch_norm_node.set_op("FusedBatchNorm"); + batch_norm_node.set_name("output"); + AddNodeInput("batch_to_space_op", &batch_norm_node); + AddNodeInput("gamma_op", &batch_norm_node); + AddNodeInput("beta_op", &batch_norm_node); + AddNodeInput("mean_op", &batch_norm_node); + AddNodeInput("variance_op", &batch_norm_node); + SetNodeAttr("T", DT_FLOAT, &batch_norm_node); + SetNodeAttr("epsilon", 0.00001f, &batch_norm_node); + SetNodeAttr("is_training", false, &batch_norm_node); + *(original_graph_def.mutable_node()->Add()) = batch_norm_node; + + std::unique_ptr original_session(NewSession(SessionOptions())); + TF_ASSERT_OK(original_session->Create(original_graph_def)); + std::vector original_outputs; + TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); + + GraphDef fused_graph_def; + TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, + &fused_graph_def)); + + std::unique_ptr fused_session(NewSession(SessionOptions())); + TF_ASSERT_OK(fused_session->Create(fused_graph_def)); + std::vector fused_outputs; + TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); + + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); + + for (const NodeDef& node : fused_graph_def.node()) { + EXPECT_NE("FusedBatchNormWithBatchToSpace", node.op()); + } +} + TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNorms) { TestFoldOldBatchNorms(); } @@ -315,5 +406,9 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { TestFoldFusedBatchNormsWithConcat(/*split=*/false); } +TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithBatchToSpace) { + TestFoldFusedBatchNormsWithBatchToSpace(); +} + } // namespace graph_transforms } // namespace tensorflow -- GitLab From ab635a9b9691e36e42de000468c13e4f66272116 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 18:33:21 -0800 Subject: [PATCH 289/311] Merged commit includes the following changes: 187697531 by andrewharp: Tweak whitespace for fft2d dep. -- 187696129 by A. Unique TensorFlower: Generalize support for logical expressions, comparison operators and multiple comparisons. -- 187692494 by vinuraja: * Adds a boolean attribute to ConfigureDistributedTPUOp for internal use. * Adds GraphRunner ctor which takes in the device to run the graph on. -- 187692129 by andrewharp: Audio utility classes for supporting MFCC and AudioSpectrogram operators -- PiperOrigin-RevId: 187697531 --- .../contrib/lite/kernels/internal/BUILD | 21 ++ .../contrib/lite/kernels/internal/mfcc.cc | 65 +++++ .../contrib/lite/kernels/internal/mfcc.h | 78 ++++++ .../contrib/lite/kernels/internal/mfcc_dct.cc | 78 ++++++ .../contrib/lite/kernels/internal/mfcc_dct.h | 43 +++ .../kernels/internal/mfcc_mel_filterbank.cc | 204 +++++++++++++++ .../kernels/internal/mfcc_mel_filterbank.h | 63 +++++ .../lite/kernels/internal/spectrogram.cc | 244 ++++++++++++++++++ .../lite/kernels/internal/spectrogram.h | 110 ++++++++ .../py2tf/converters/logical_expressions.py | 121 ++++++--- .../converters/logical_expressions_test.py | 4 +- tensorflow/contrib/py2tf/impl/conversion.py | 2 +- .../contrib/tpu/ops/tpu_configuration_ops.cc | 2 + .../core/common_runtime/graph_runner.cc | 25 +- tensorflow/core/common_runtime/graph_runner.h | 9 +- 15 files changed, 1018 insertions(+), 51 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc.h create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_dct.h create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h create mode 100644 tensorflow/contrib/lite/kernels/internal/spectrogram.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/spectrogram.h diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index 6ccad3b1ce..d5dd2cbf14 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -309,6 +309,27 @@ cc_library( ], ) +# Audio support classes imported directly from TensorFlow. +cc_library( + name = "audio_utils", + srcs = [ + "mfcc.cc", + "mfcc_dct.cc", + "mfcc_mel_filterbank.cc", + "spectrogram.cc", + ], + hdrs = [ + "mfcc.h", + "mfcc_dct.h", + "mfcc_mel_filterbank.h", + "spectrogram.h", + ], + deps = [ + "//third_party/fft2d:fft2d_headers", + "@fft2d", + ], +) + cc_library( name = "tensor_utils", srcs = [ diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc.cc b/tensorflow/contrib/lite/kernels/internal/mfcc.cc new file mode 100644 index 0000000000..eafe0c7afe --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc.cc @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/contrib/lite/kernels/internal/mfcc.h" + +namespace tflite { +namespace internal { + +const double kDefaultUpperFrequencyLimit = 4000; +const double kDefaultLowerFrequencyLimit = 20; +const double kFilterbankFloor = 1e-12; +const int kDefaultFilterbankChannelCount = 40; +const int kDefaultDCTCoefficientCount = 13; + +Mfcc::Mfcc() + : initialized_(false), + lower_frequency_limit_(kDefaultLowerFrequencyLimit), + upper_frequency_limit_(kDefaultUpperFrequencyLimit), + filterbank_channel_count_(kDefaultFilterbankChannelCount), + dct_coefficient_count_(kDefaultDCTCoefficientCount) {} + +bool Mfcc::Initialize(int input_length, double input_sample_rate) { + bool initialized = mel_filterbank_.Initialize( + input_length, input_sample_rate, filterbank_channel_count_, + lower_frequency_limit_, upper_frequency_limit_); + initialized &= + dct_.Initialize(filterbank_channel_count_, dct_coefficient_count_); + initialized_ = initialized; + return initialized; +} + +void Mfcc::Compute(const std::vector& spectrogram_frame, + std::vector* output) const { + if (!initialized_) { + // LOG(ERROR) << "Mfcc not initialized."; + return; + } + std::vector working; + mel_filterbank_.Compute(spectrogram_frame, &working); + for (int i = 0; i < working.size(); ++i) { + double val = working[i]; + if (val < kFilterbankFloor) { + val = kFilterbankFloor; + } + working[i] = log(val); + } + dct_.Compute(working, output); +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc.h b/tensorflow/contrib/lite/kernels/internal/mfcc.h new file mode 100644 index 0000000000..d8500ecdcf --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc.h @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic class for computing MFCCs from spectrogram slices. + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_H_ + +#include + +#include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" +#include "tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h" + +namespace tflite { +namespace internal { + +class Mfcc { + public: + Mfcc(); + bool Initialize(int input_length, double input_sample_rate); + + // Input is a single squared-magnitude spectrogram frame. The input spectrum + // is converted to linear magnitude and weighted into bands using a + // triangular mel filterbank, and a discrete cosine transform (DCT) of the + // values is taken. Output is populated with the lowest dct_coefficient_count + // of these values. + void Compute(const std::vector& spectrogram_frame, + std::vector* output) const; + + void set_upper_frequency_limit(double upper_frequency_limit) { + // CHECK(!initialized_) << "Set frequency limits before calling + // Initialize."; + upper_frequency_limit_ = upper_frequency_limit; + } + + void set_lower_frequency_limit(double lower_frequency_limit) { + // CHECK(!initialized_) << "Set frequency limits before calling + // Initialize."; + lower_frequency_limit_ = lower_frequency_limit; + } + + void set_filterbank_channel_count(int filterbank_channel_count) { + /// CHECK(!initialized_) << "Set channel count before calling Initialize."; + filterbank_channel_count_ = filterbank_channel_count; + } + + void set_dct_coefficient_count(int dct_coefficient_count) { + // CHECK(!initialized_) << "Set coefficient count before calling + // Initialize."; + dct_coefficient_count_ = dct_coefficient_count; + } + + private: + MfccMelFilterbank mel_filterbank_; + MfccDct dct_; + bool initialized_; + double lower_frequency_limit_; + double upper_frequency_limit_; + int filterbank_channel_count_; + int dct_coefficient_count_; +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc new file mode 100644 index 0000000000..b0b7d181bd --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" + +#include + +namespace tflite { +namespace internal { + +MfccDct::MfccDct() : initialized_(false) {} + +bool MfccDct::Initialize(int input_length, int coefficient_count) { + coefficient_count_ = coefficient_count; + input_length_ = input_length; + + if (coefficient_count_ < 1) { + return false; + } + + if (input_length < 1) { + return false; + } + + if (coefficient_count_ > input_length_) { + return false; + } + + cosines_.resize(coefficient_count_); + double fnorm = sqrt(2.0 / input_length_); + // Some platforms don't have M_PI, so define a local constant here. + const double pi = atan(1) * 4; + double arg = pi / input_length_; + for (int i = 0; i < coefficient_count_; ++i) { + cosines_[i].resize(input_length_); + for (int j = 0; j < input_length_; ++j) { + cosines_[i][j] = fnorm * cos(i * arg * (j + 0.5)); + } + } + initialized_ = true; + return true; +} + +void MfccDct::Compute(const std::vector &input, + std::vector *output) const { + if (!initialized_) { + return; + } + + output->resize(coefficient_count_); + int length = input.size(); + if (length > input_length_) { + length = input_length_; + } + + for (int i = 0; i < coefficient_count_; ++i) { + double sum = 0.0; + for (int j = 0; j < length; ++j) { + sum += cosines_[i][j] * input[j]; + } + (*output)[i] = sum; + } +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_dct.h b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.h new file mode 100644 index 0000000000..a53f5cbd9b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic minimal DCT class for MFCC speech processing. + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_DCT_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_DCT_H_ + +#include + +namespace tflite { +namespace internal { + +class MfccDct { + public: + MfccDct(); + bool Initialize(int input_length, int coefficient_count); + void Compute(const std::vector& input, + std::vector* output) const; + + private: + bool initialized_; + int coefficient_count_; + int input_length_; + std::vector > cosines_; +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_DCT_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc new file mode 100644 index 0000000000..c3deb33d91 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc @@ -0,0 +1,204 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This code resamples the FFT bins, and smooths then with triangle-shaped +// weights to create a mel-frequency filter bank. For filter i centered at f_i, +// there is a triangular weighting of the FFT bins that extends from +// filter f_i-1 (with a value of zero at the left edge of the triangle) to f_i +// (where the filter value is 1) to f_i+1 (where the filter values returns to +// zero). + +// Note: this code fails if you ask for too many channels. The algorithm used +// here assumes that each FFT bin contributes to at most two channels: the +// right side of a triangle for channel i, and the left side of the triangle +// for channel i+1. If you ask for so many channels that some of the +// resulting mel triangle filters are smaller than a single FFT bin, these +// channels may end up with no contributing FFT bins. The resulting mel +// spectrum output will have some channels that are always zero. + +#include "tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h" + +#include + +namespace tflite { +namespace internal { + +MfccMelFilterbank::MfccMelFilterbank() : initialized_(false) {} + +bool MfccMelFilterbank::Initialize(int input_length, double input_sample_rate, + int output_channel_count, + double lower_frequency_limit, + double upper_frequency_limit) { + num_channels_ = output_channel_count; + sample_rate_ = input_sample_rate; + input_length_ = input_length; + + if (num_channels_ < 1) { + // LOG(ERROR) << "Number of filterbank channels must be positive."; + return false; + } + + if (sample_rate_ <= 0) { + // LOG(ERROR) << "Sample rate must be positive."; + return false; + } + + if (input_length < 2) { + // LOG(ERROR) << "Input length must greater than 1."; + return false; + } + + if (lower_frequency_limit < 0) { + // LOG(ERROR) << "Lower frequency limit must be nonnegative."; + return false; + } + + if (upper_frequency_limit <= lower_frequency_limit) { + /// LOG(ERROR) << "Upper frequency limit must be greater than " + // << "lower frequency limit."; + return false; + } + + // An extra center frequency is computed at the top to get the upper + // limit on the high side of the final triangular filter. + center_frequencies_.resize(num_channels_ + 1); + const double mel_low = FreqToMel(lower_frequency_limit); + const double mel_hi = FreqToMel(upper_frequency_limit); + const double mel_span = mel_hi - mel_low; + const double mel_spacing = mel_span / static_cast(num_channels_ + 1); + for (int i = 0; i < num_channels_ + 1; ++i) { + center_frequencies_[i] = mel_low + (mel_spacing * (i + 1)); + } + + // Always exclude DC; emulate HTK. + const double hz_per_sbin = + 0.5 * sample_rate_ / static_cast(input_length_ - 1); + start_index_ = static_cast(1.5 + (lower_frequency_limit / hz_per_sbin)); + end_index_ = static_cast(upper_frequency_limit / hz_per_sbin); + + // Maps the input spectrum bin indices to filter bank channels/indices. For + // each FFT bin, band_mapper tells us which channel this bin contributes to + // on the right side of the triangle. Thus this bin also contributes to the + // left side of the next channel's triangle response. + band_mapper_.resize(input_length_); + int channel = 0; + for (int i = 0; i < input_length_; ++i) { + double melf = FreqToMel(i * hz_per_sbin); + if ((i < start_index_) || (i > end_index_)) { + band_mapper_[i] = -2; // Indicate an unused Fourier coefficient. + } else { + while ((center_frequencies_[channel] < melf) && + (channel < num_channels_)) { + ++channel; + } + band_mapper_[i] = channel - 1; // Can be == -1 + } + } + + // Create the weighting functions to taper the band edges. The contribution + // of any one FFT bin is based on its distance along the continuum between two + // mel-channel center frequencies. This bin contributes weights_[i] to the + // current channel and 1-weights_[i] to the next channel. + weights_.resize(input_length_); + for (int i = 0; i < input_length_; ++i) { + channel = band_mapper_[i]; + if ((i < start_index_) || (i > end_index_)) { + weights_[i] = 0.0; + } else { + if (channel >= 0) { + weights_[i] = + (center_frequencies_[channel + 1] - FreqToMel(i * hz_per_sbin)) / + (center_frequencies_[channel + 1] - center_frequencies_[channel]); + } else { + weights_[i] = (center_frequencies_[0] - FreqToMel(i * hz_per_sbin)) / + (center_frequencies_[0] - mel_low); + } + } + } + // Check the sum of FFT bin weights for every mel band to identify + // situations where the mel bands are so narrow that they don't get + // significant weight on enough (or any) FFT bins -- i.e., too many + // mel bands have been requested for the given FFT size. + std::vector bad_channels; + for (int c = 0; c < num_channels_; ++c) { + float band_weights_sum = 0.0; + for (int i = 0; i < input_length_; ++i) { + if (band_mapper_[i] == c - 1) { + band_weights_sum += (1.0 - weights_[i]); + } else if (band_mapper_[i] == c) { + band_weights_sum += weights_[i]; + } + } + // The lowest mel channels have the fewest FFT bins and the lowest + // weights sum. But given that the target gain at the center frequency + // is 1.0, if the total sum of weights is 0.5, we're in bad shape. + if (band_weights_sum < 0.5) { + bad_channels.push_back(c); + } + } + if (!bad_channels.empty()) { + /* + LOG(ERROR) << "Missing " << bad_channels.size() << " bands " + << " starting at " << bad_channels[0] + << " in mel-frequency design. " + << "Perhaps too many channels or " + << "not enough frequency resolution in spectrum. (" + << "input_length: " << input_length + << " input_sample_rate: " << input_sample_rate + << " output_channel_count: " << output_channel_count + << " lower_frequency_limit: " << lower_frequency_limit + << " upper_frequency_limit: " << upper_frequency_limit; + */ + } + initialized_ = true; + return true; +} + +// Compute the mel spectrum from the squared-magnitude FFT input by taking the +// square root, then summing FFT magnitudes under triangular integration windows +// whose widths increase with frequency. +void MfccMelFilterbank::Compute(const std::vector &input, + std::vector *output) const { + if (!initialized_) { + // LOG(ERROR) << "Mel Filterbank not initialized."; + return; + } + + if (input.size() <= end_index_) { + // LOG(ERROR) << "Input too short to compute filterbank"; + return; + } + + // Ensure output is right length and reset all values. + output->assign(num_channels_, 0.0); + + for (int i = start_index_; i <= end_index_; i++) { // For each FFT bin + double spec_val = sqrt(input[i]); + double weighted = spec_val * weights_[i]; + int channel = band_mapper_[i]; + if (channel >= 0) + (*output)[channel] += weighted; // Right side of triangle, downward slope + channel++; + if (channel < num_channels_) + (*output)[channel] += spec_val - weighted; // Left side of triangle + } +} + +double MfccMelFilterbank::FreqToMel(double freq) const { + return 1127.0 * log(1.0 + (freq / 700.0)); +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h new file mode 100644 index 0000000000..c1db28243e --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h @@ -0,0 +1,63 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic class for applying a mel-scale mapping to a power spectrum. + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_MEL_FILTERBANK_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_MEL_FILTERBANK_H_ + +#include + +namespace tflite { +namespace internal { + +class MfccMelFilterbank { + public: + MfccMelFilterbank(); + bool Initialize(int input_length, // Number of unique FFT bins fftsize/2+1. + double input_sample_rate, int output_channel_count, + double lower_frequency_limit, double upper_frequency_limit); + + // Takes a squared-magnitude spectrogram slice as input, computes a + // triangular-mel-weighted linear-magnitude filterbank, and places the result + // in output. + void Compute(const std::vector& input, + std::vector* output) const; + + private: + double FreqToMel(double freq) const; + bool initialized_; + int num_channels_; + double sample_rate_; + int input_length_; + std::vector center_frequencies_; // In mel, for each mel channel. + + // Each FFT bin b contributes to two triangular mel channels, with + // proportion weights_[b] going into mel channel band_mapper_[b], and + // proportion (1 - weights_[b]) going into channel band_mapper_[b] + 1. + // Thus, weights_ contains the weighting applied to each FFT bin for the + // upper-half of the triangular band. + std::vector weights_; // Right-side weight for this fft bin. + + // FFT bin i contributes to the upper side of mel channel band_mapper_[i] + std::vector band_mapper_; + int start_index_; // Lowest FFT bin used to calculate mel spectrum. + int end_index_; // Highest FFT bin used to calculate mel spectrum. +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_MEL_FILTERBANK_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc new file mode 100644 index 0000000000..66ca694dc4 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc @@ -0,0 +1,244 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/internal/spectrogram.h" + +#include + +#include "third_party/fft2d/fft.h" + +namespace tflite { +namespace internal { + +using std::complex; + +namespace { +// Returns the default Hann window function for the spectrogram. +void GetPeriodicHann(int window_length, std::vector* window) { + // Some platforms don't have M_PI, so define a local constant here. + const double pi = std::atan(1) * 4; + window->resize(window_length); + for (int i = 0; i < window_length; ++i) { + (*window)[i] = 0.5 - 0.5 * cos((2 * pi * i) / window_length); + } +} +} // namespace + +bool Spectrogram::Initialize(int window_length, int step_length) { + std::vector window; + GetPeriodicHann(window_length, &window); + return Initialize(window, step_length); +} + +inline int Log2Floor(uint n) { + if (n == 0) return -1; + int log = 0; + uint value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + assert(value == 1); + return log; +} + +inline int Log2Ceiling(uint n) { + int floor = Log2Floor(n); + if (n == (n & ~(n - 1))) // zero or a power of two + return floor; + else + return floor + 1; +} + +inline uint NextPowerOfTwo(uint value) { + int exponent = Log2Ceiling(value); + // DCHECK_LT(exponent, std::numeric_limits::digits); + return 1 << exponent; +} + +bool Spectrogram::Initialize(const std::vector& window, + int step_length) { + window_length_ = window.size(); + window_ = window; // Copy window. + if (window_length_ < 2) { + // LOG(ERROR) << "Window length too short."; + initialized_ = false; + return false; + } + + step_length_ = step_length; + if (step_length_ < 1) { + // LOG(ERROR) << "Step length must be positive."; + initialized_ = false; + return false; + } + + fft_length_ = NextPowerOfTwo(window_length_); + // CHECK(fft_length_ >= window_length_); + output_frequency_channels_ = 1 + fft_length_ / 2; + + // Allocate 2 more than what rdft needs, so we can rationalize the layout. + fft_input_output_.assign(fft_length_ + 2, 0.0); + + int half_fft_length = fft_length_ / 2; + fft_double_working_area_.assign(half_fft_length, 0.0); + fft_integer_working_area_.assign(2 + static_cast(sqrt(half_fft_length)), + 0); + // Set flag element to ensure that the working areas are initialized + // on the first call to cdft. It's redundant given the assign above, + // but keep it as a reminder. + fft_integer_working_area_[0] = 0; + input_queue_.clear(); + samples_to_next_step_ = window_length_; + initialized_ = true; + return true; +} + +template +bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>* output) { + if (!initialized_) { + // LOG(ERROR) << "ComputeComplexSpectrogram() called before successful call + // " + // << "to Initialize()."; + return false; + } + // CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + // DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + auto& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // This will convert double to float if it needs to. + spectrogram_slice[i] = complex( + fft_input_output_[2 * i], fft_input_output_[2 * i + 1]); + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, std::vector>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>*); + +template +bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, + std::vector>* output) { + if (!initialized_) { + // LOG(ERROR) << "ComputeSquaredMagnitudeSpectrogram() called before " + // << "successful call to Initialize()."; + return false; + } + // CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + // DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + auto& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // Similar to the Complex case, except storing the norm. + // But the norm function is known to be a performance killer, + // so do it this way with explicit real and imagninary temps. + const double re = fft_input_output_[2 * i]; + const double im = fft_input_output_[2 * i + 1]; + // Which finally converts double to float if it needs to. + spectrogram_slice[i] = re * re + im * im; + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); + +// Return true if a full window of samples is prepared; manage the queue. +template +bool Spectrogram::GetNextWindowOfSamples(const std::vector& input, + int* input_start) { + auto input_it = input.begin() + *input_start; + int input_remaining = input.end() - input_it; + if (samples_to_next_step_ > input_remaining) { + // Copy in as many samples are left and return false, no full window. + input_queue_.insert(input_queue_.end(), input_it, input.end()); + *input_start += input_remaining; // Increases it to input.size(). + samples_to_next_step_ -= input_remaining; + return false; // Not enough for a full window. + } else { + // Copy just enough into queue to make a new window, then trim the + // front off the queue to make it window-sized. + input_queue_.insert(input_queue_.end(), input_it, + input_it + samples_to_next_step_); + *input_start += samples_to_next_step_; + input_queue_.erase( + input_queue_.begin(), + input_queue_.begin() + input_queue_.size() - window_length_); + // DCHECK_EQ(window_length_, input_queue_.size()); + samples_to_next_step_ = step_length_; // Be ready for next time. + return true; // Yes, input_queue_ now contains exactly a window-full. + } +} + +void Spectrogram::ProcessCoreFFT() { + for (int j = 0; j < window_length_; ++j) { + fft_input_output_[j] = input_queue_[j] * window_[j]; + } + // Zero-pad the rest of the input buffer. + for (int j = window_length_; j < fft_length_; ++j) { + fft_input_output_[j] = 0.0; + } + const int kForwardFFT = 1; // 1 means forward; -1 reverse. + // This real FFT is a fair amount faster than using cdft here. + rdft(fft_length_, kForwardFFT, &fft_input_output_[0], + &fft_integer_working_area_[0], &fft_double_working_area_[0]); + // Make rdft result look like cdft result; + // unpack the last real value from the first position's imag slot. + fft_input_output_[fft_length_] = fft_input_output_[1]; + fft_input_output_[fft_length_ + 1] = 0; + fft_input_output_[1] = 0; +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.h b/tensorflow/contrib/lite/kernels/internal/spectrogram.h new file mode 100644 index 0000000000..b77a68f7df --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.h @@ -0,0 +1,110 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Class for generating spectrogram slices from a waveform. +// Initialize() should be called before calls to other functions. Once +// Initialize() has been called and returned true, The Compute*() functions can +// be called repeatedly with sequential input data (ie. the first element of the +// next input vector directly follows the last element of the previous input +// vector). Whenever enough audio samples are buffered to produce a +// new frame, it will be placed in output. Output is cleared on each +// call to Compute*(). This class is thread-unsafe, and should only be +// called from one thread at a time. +// With the default parameters, the output of this class should be very +// close to the results of the following MATLAB code: +// overlap_samples = window_length_samples - step_samples; +// window = hann(window_length_samples, 'periodic'); +// S = abs(spectrogram(audio, window, overlap_samples)).^2; + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_SPECTROGRAM_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_SPECTROGRAM_H_ + +#include +#include +#include + +#include "third_party/fft2d/fft.h" + +namespace tflite { +namespace internal { + +class Spectrogram { + public: + Spectrogram() : initialized_(false) {} + ~Spectrogram() {} + + // Initializes the class with a given window length and step length + // (both in samples). Internally a Hann window is used as the window + // function. Returns true on success, after which calls to Process() + // are possible. window_length must be greater than 1 and step + // length must be greater than 0. + bool Initialize(int window_length, int step_length); + + // Initialize with an explicit window instead of a length. + bool Initialize(const std::vector& window, int step_length); + + // Processes an arbitrary amount of audio data (contained in input) + // to yield complex spectrogram frames. After a successful call to + // Initialize(), Process() may be called repeatedly with new input data + // each time. The audio input is buffered internally, and the output + // vector is populated with as many temporally-ordered spectral slices + // as it is possible to generate from the input. The output is cleared + // on each call before the new frames (if any) are added. + // + // The template parameters can be float or double. + template + bool ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>* output); + + // This function works as the one above, but returns the power + // (the L2 norm, or the squared magnitude) of each complex value. + template + bool ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, + std::vector>* output); + + // Return reference to the window function used internally. + const std::vector& GetWindow() const { return window_; } + + // Return the number of frequency channels in the spectrogram. + int output_frequency_channels() const { return output_frequency_channels_; } + + private: + template + bool GetNextWindowOfSamples(const std::vector& input, + int* input_start); + void ProcessCoreFFT(); + + int fft_length_; + int output_frequency_channels_; + int window_length_; + int step_length_; + bool initialized_; + int samples_to_next_step_; + + std::vector window_; + std::vector fft_input_output_; + std::deque input_queue_; + + // Working data areas for the FFT routines. + std::vector fft_integer_working_area_; + std::vector fft_double_working_area_; +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_SPECTROGRAM_H_ diff --git a/tensorflow/contrib/py2tf/converters/logical_expressions.py b/tensorflow/contrib/py2tf/converters/logical_expressions.py index df980d41c9..766aa11efd 100644 --- a/tensorflow/contrib/py2tf/converters/logical_expressions.py +++ b/tensorflow/contrib/py2tf/converters/logical_expressions.py @@ -23,52 +23,107 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import templates +from tensorflow.contrib.py2tf.pyct import transformer -class LogicalExpressionTransformer(gast.NodeTransformer): +# TODO(mdan): Properly extrack boolean ops according to lazy eval rules. +# Note that this isn't completely safe either, because tensors may have control +# dependencies. +# Note that for loops that should be done after the loop was converted to +# tf.while_loop so that the expanded conditionals are properly scoped. + +# Used to signal that an operand is safe for non-lazy evaluation. +SAFE_BOOLEAN_OPERAND = 'SAFE_BOOLEAN_OPERAND' + + +class LogicalExpressionTransformer(transformer.Base): """Converts logical expressions to corresponding TF calls.""" - def __init__(self): + def __init__(self, context): + super(LogicalExpressionTransformer, self).__init__(context) # TODO(mdan): Look into replacing with bitwise operators instead. self.op_mapping = { - gast.And: 'tf.logical_and', - gast.Or: 'tf.logical_or', - gast.Not: 'tf.logical_not', - gast.Eq: 'tf.equal', + gast.And: 'logical_and', + gast.Eq: 'equal', + gast.Gt: 'greater', + gast.GtE: 'greater_equal', + gast.Lt: 'less', + gast.LtE: 'less_equal', + gast.Not: 'logical_not', + gast.NotEq: 'not_equal', + gast.Or: 'logical_or', + gast.USub: 'negative', } + def _expect_simple_symbol(self, operand): + if isinstance(operand, gast.Name): + return + if anno.hasanno(operand, SAFE_BOOLEAN_OPERAND): + return + raise NotImplementedError( + 'only simple local variables are supported in logical and compound ' + 'comparison expressions; for example, we support "a or b" but not ' + '"a.x or b"; for a workaround, assign the expression to a local ' + 'variable and use that instead, for example "tmp = a.x", "tmp or b"') + + def _matching_tf_op(self, operator): + op_type = type(operator) + mapped_op = self.op_mapping.get(op_type) + if not mapped_op: + raise NotImplementedError('operator %s is not yet supported' % op_type) + return mapped_op + + def _inline_tf_op(self, op_name, args): + template = """ + tf.op_name(args) + """ + replacement = templates.replace(template, op_name=op_name, args=args) + # It's a body with a single expression, we want its value. + n = replacement[0].value + anno.setanno(n, SAFE_BOOLEAN_OPERAND, True) + return n + def visit_Compare(self, node): node = self.generic_visit(node) - if len(node.ops) > 1: - raise NotImplementedError() - cmp_type = type(node.ops[0]) - if cmp_type in self.op_mapping: - tf_function = parser.parse_str(self.op_mapping[cmp_type]).body[0].value - return gast.Call( - func=tf_function, args=[node.left, node.comparators[0]], keywords=[]) - return node + ops_and_comps = list(zip(node.ops, node.comparators)) + left = node.left + op_tree = None + + # Repeated comparisons are converted to conjunctions: + # a < b < c -> a < b and b < c + while ops_and_comps: + op, right = ops_and_comps.pop(0) + binary_comparison = self._inline_tf_op(self._matching_tf_op(op), + (left, right)) + if isinstance(left, gast.Name) and isinstance(right, gast.Name): + anno.setanno(binary_comparison, SAFE_BOOLEAN_OPERAND, True) + if op_tree: + self._expect_simple_symbol(right) + op_tree = self._inline_tf_op('logical_and', + (binary_comparison, op_tree)) + else: + op_tree = binary_comparison + left = right + assert op_tree is not None + return op_tree def visit_UnaryOp(self, node): node = self.generic_visit(node) - if isinstance(node.op, gast.Not): - tf_function = parser.parse_str(self.op_mapping[type( - node.op)]).body[0].value - node = gast.Call(func=tf_function, args=[node.operand], keywords=[]) - return node + return self._inline_tf_op(self._matching_tf_op(node.op), node.operand) def visit_BoolOp(self, node): - # TODO(mdan): A normalizer may be useful here. Use ANF? node = self.generic_visit(node) - tf_function = parser.parse_str(self.op_mapping[type(node.op)]).body[0].value - left = node.values[0] - for i in range(1, len(node.values)): - left = gast.Call( - func=tf_function, args=[left, node.values[i]], keywords=[]) - return left - - -def transform(node): - transformer = LogicalExpressionTransformer() - node = transformer.visit(node) - return node + node_values = node.values + right = node.values.pop() + self._expect_simple_symbol(right) + while node_values: + left = node_values.pop() + self._expect_simple_symbol(left) + right = self._inline_tf_op(self._matching_tf_op(node.op), (left, right)) + return right + + +def transform(node, context): + return LogicalExpressionTransformer(context).visit(node) diff --git a/tensorflow/contrib/py2tf/converters/logical_expressions_test.py b/tensorflow/contrib/py2tf/converters/logical_expressions_test.py index a28326c517..eb28c309a4 100644 --- a/tensorflow/contrib/py2tf/converters/logical_expressions_test.py +++ b/tensorflow/contrib/py2tf/converters/logical_expressions_test.py @@ -32,7 +32,7 @@ class GradientsFunctionTest(converter_test_base.TestCase): return a == b node = self.parse_and_analyze(test_fn, {}) - node = logical_expressions.transform(node) + node = logical_expressions.transform(node, self.ctx) with self.compiled(node, math_ops.equal) as result: with self.test_session() as sess: @@ -45,7 +45,7 @@ class GradientsFunctionTest(converter_test_base.TestCase): return (a or b) and (a or b or c) node = self.parse_and_analyze(test_fn, {}) - node = logical_expressions.transform(node) + node = logical_expressions.transform(node, self.ctx) with self.compiled(node, math_ops.logical_or, math_ops.logical_and) as result: diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index d95469ea53..c6f4988375 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -312,7 +312,7 @@ def node_to_graph(node, ctx, nocompile_decorators): # control_flow may create new symbols and change scopes. node = _static_analysis_pass(node, ctx) - node = logical_expressions.transform(node) + node = logical_expressions.transform(node, ctx) node = side_effect_guards.transform(node, ctx) node = name_scopes.transform(node, ctx) diff --git a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc index f8de8baa65..7bf5c21d0b 100644 --- a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc +++ b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc @@ -191,6 +191,7 @@ REGISTER_OP("ConfigureDistributedTPU") .Output("topology: string") .Attr("embedding_config: string = ''") .Attr("tpu_embedding_config: string = ''") + .Attr("is_global_init: bool = false") .SetIsStateful() .SetShapeFn(shape_inference::UnknownShape) .Doc(R"doc( @@ -202,6 +203,7 @@ topology. tpu_embedding_config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that describes the embedding lookups of the program. embedding_config: Reserved. Do not use. +is_global_init: Reserved. Do not use. )doc"); REGISTER_OP("ShutdownDistributedTPU") diff --git a/tensorflow/core/common_runtime/graph_runner.cc b/tensorflow/core/common_runtime/graph_runner.cc index f1082a6003..1125d2a34a 100644 --- a/tensorflow/core/common_runtime/graph_runner.cc +++ b/tensorflow/core/common_runtime/graph_runner.cc @@ -97,7 +97,9 @@ class SimpleRendezvous : public Rendezvous { } // namespace -GraphRunner::GraphRunner(Env* env) : cpu_device_(GetCPUDevice(env)) {} +GraphRunner::GraphRunner(Env* env) + : device_deleter_(GetCPUDevice(env)), device_(device_deleter_.get()) {} +GraphRunner::GraphRunner(Device* device) : device_(device) {} GraphRunner::~GraphRunner() {} @@ -105,17 +107,18 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, const NamedTensorList& inputs, const std::vector& output_names, std::vector* outputs) { - if (cpu_device_ == nullptr) { + if (device_ == nullptr) { return errors::NotFound("Cannot find a device for GraphRunner."); } if (function_library && function_library->device() && - function_library->device()->device_type() != cpu_device_->device_type()) { - // We are running on a CPU but the function library is for a non-CPU device, - // so just ignore the function_library. + function_library->device()->device_type() != device_->device_type()) { + // Mismatch between function_library's device_type and device_'s + // device_type. // TODO(matthewmurray) Can we create a new FunctionLibraryRuntime that is - // identical to function_library except that it uses CPU? - VLOG(1) << "Cannot run on CPU device with a function library for a " + // identical to function_library except that it uses the given 'device_'? + VLOG(1) << "Cannot run on: " << device_->device_type() + << " with a function library for a " << function_library->device()->device_type() << " device."; function_library = nullptr; } @@ -146,8 +149,7 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, subgraph::RewriteGraphMetadata metadata; TF_RETURN_IF_ERROR(subgraph::RewriteGraphForExecution( graph_to_run.get(), input_names, output_names, {} /* target nodes */, - cpu_device_->attributes(), false /* use_function_convention */, - &metadata)); + device_->attributes(), false /* use_function_convention */, &metadata)); // Create the local executor and the Rendezvous for fetching back the // constants. @@ -158,13 +160,12 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, LocalExecutorParams params; // The ownership of the output tensors are bound to this device's lifetime. - params.device = cpu_device_.get(); + params.device = device_; params.function_library = function_library; const int producer = graph_to_run->versions().producer(); params.create_kernel = [this, producer](const NodeDef& ndef, OpKernel** kernel) { - return CreateNonCachedKernel(cpu_device_.get(), nullptr, ndef, producer, - kernel); + return CreateNonCachedKernel(device_, nullptr, ndef, producer, kernel); }; params.delete_kernel = [](OpKernel* kernel) { delete kernel; }; diff --git a/tensorflow/core/common_runtime/graph_runner.h b/tensorflow/core/common_runtime/graph_runner.h index 1e4ae77227..1c4b2b719c 100644 --- a/tensorflow/core/common_runtime/graph_runner.h +++ b/tensorflow/core/common_runtime/graph_runner.h @@ -36,12 +36,14 @@ namespace tensorflow { // This class is only meant for internal use where one needs to // partially evaluate inexpensive nodes in a graph, such as for shape // inference or for constant folding. Because of its limited, simple -// use-cases, it executes all computation on the CPU and is not meant -// to be particularly lightweight, fast, or efficient. +// use-cases, it executes all computation on the given device (CPU by default) +// and is not meant to be particularly lightweight, fast, or efficient. class GraphRunner { public: // REQUIRES: `env` is not nullptr. GraphRunner(Env* env); + // REQUIRES: 'device' is not nullptr. Not owned. + GraphRunner(Device* device); ~GraphRunner(); // Function semantics for `inputs`, `output_names` and `outputs` @@ -59,7 +61,8 @@ class GraphRunner { std::vector* outputs); private: - std::unique_ptr cpu_device_; + std::unique_ptr device_deleter_; + Device* const device_; }; } // namespace tensorflow -- GitLab From 05a264fdf55dcd9763d43804c71f35d8c160a5a5 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 19:18:49 -0800 Subject: [PATCH 290/311] tfdbg: Add link to TensorBoard Debugger Plugin from the CLI documentation RELNOTES: tfdbg: TensorFlow Debugger's graphical user interface (GUI), the [TensorBoard Debugger Plugin](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md), is now in alpha. PiperOrigin-RevId: 187700265 --- .../docs_src/programmers_guide/debugger.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index c8fdae6f60..5fb1c2da88 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -23,8 +23,13 @@ debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm. > installed using `pip install .whl`, however curses on Windows > may not work as reliably as curses on Linux or Mac. -This tutorial demonstrates how to use the **tfdbg** command-line interface -(CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN) +> NOTE: This guide focuses on the command-line interface (CLI) of tfdbg. For +> guide on how to use the graphical user interface (GUI) of tfdbg, i.e., the +> **TensorBoard Debugger Plugin**, please visit +> [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). + +This tutorial demonstrates how to use the **tfdbg** CLI to debug the appearance +of [`nan`s](https://en.wikipedia.org/wiki/NaN) and [`inf`s](https://en.wikipedia.org/wiki/Infinity), a frequently-encountered type of bug in TensorFlow model development. The following example is for users who use the low-level @@ -806,3 +811,13 @@ sess.run(b) the constant-folding would not occur and `tfdbg` should show the intermediate tensor dumps. + +**Q**: Is there a GUI for tfdbg? + +**A**: Yes, the **TensorBoard Debugger Plugin** is the GUI of tfdbg. + It offers features such as inspection of the computation graph, + real-time visualization of tensor values, continuation to tensor + and conditional breakpoints, and tying tensors to their + graph-construction source code, all in the browser environment. + To get started, please visit + [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). -- GitLab From c645201fa9861dc9e0555a693a04e503ed40d01a Mon Sep 17 00:00:00 2001 From: Michael Case Date: Sat, 3 Mar 2018 10:04:35 -0800 Subject: [PATCH 291/311] Internal Change. PiperOrigin-RevId: 187738384 --- .../tools/integration_tests/gcs_smoke_test/{BUILD.bazel => BUILD} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tensorflow/tools/integration_tests/gcs_smoke_test/{BUILD.bazel => BUILD} (100%) diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel b/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD similarity index 100% rename from tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel rename to tensorflow/tools/integration_tests/gcs_smoke_test/BUILD -- GitLab From 421077f6ec9af420c9f11d6cff15ef6e0b21104d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 3 Mar 2018 14:26:21 -0800 Subject: [PATCH 292/311] Will open source this part of code. PiperOrigin-RevId: 187747019 --- tensorflow/contrib/framework/__init__.py | 2 + tensorflow/python/kernel_tests/BUILD | 3 + .../python/kernel_tests/init_ops_test.py | 79 +++++++++++++++++++ tensorflow/python/ops/init_ops.py | 58 +++++++++++++- 4 files changed, 141 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 8063250091..21f9651318 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -71,6 +71,7 @@ See the @{$python/contrib.framework} guide. @@model_variable @@variable @@VariableDeviceChooser +@@convolutional_delta_orthogonal @@zero_initializer @@load_checkpoint @@ -111,6 +112,7 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec +from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['nest'] diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 0f13e8bba5..23b79a24c0 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1571,12 +1571,15 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", + "//tensorflow/python:layers", "//tensorflow/python:framework", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:init_ops", + "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", "//tensorflow/python:nn_ops", "//tensorflow/python:partitioned_variables", + "//tensorflow/python:random_ops", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", ], diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index 19a7d2f9d5..c1755985ee 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -25,10 +25,13 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed +from tensorflow.python.layers import convolutional from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -571,6 +574,82 @@ class OrthogonalInitializerTest(test.TestCase): np.dot(t, t.T), np.eye(t.shape[0]), rtol=tol, atol=tol) +class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): + + def testInitializerIdentical(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + init2 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + self.assertTrue(identicaltest(self, init1, init2, (3, 3, 10, 10))) + + def testInitializerDifferent(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + init2 = init_ops.convolutional_delta_orthogonal(seed=2, dtype=dtype) + self.assertFalse(identicaltest(self, init1, init2, (3, 3, 10, 10))) + + def testDuplicatedInitializer(self): + init = init_ops.convolutional_delta_orthogonal() + self.assertFalse(duplicated_initializer(self, init, 1, (3, 3, 10, 10))) + + def testInvalidDataType(self): + self.assertRaises( + ValueError, init_ops.convolutional_delta_orthogonal, + dtype=dtypes.string) + + def testInvalidShape(self): + init1 = init_ops.convolutional_delta_orthogonal() + with self.test_session(graph=ops.Graph(), use_gpu=True): + self.assertRaises(ValueError, init1, shape=[3, 3, 6, 5]) + + def testGain(self): + shape = (3, 3, 10, 10) + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + init2 = init_ops.convolutional_delta_orthogonal(gain=3.14, + seed=1, dtype=dtype) + with self.test_session(graph=ops.Graph(), use_gpu=True): + t1 = init1(shape).eval() + with self.test_session(graph=ops.Graph(), use_gpu=True): + t2 = init2(shape).eval() + return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) + + def testShapesValues(self): + for dtype in [dtypes.float32]: + for kernel_size in [[3], [8], [3, 5], [2, 4], [3, 3, 3], [2, 2, 2]]: + tol = 1e-2 + # Check orthogonality by computing the 2-norms of the inputs and ouputs. + if len(kernel_size) == 1: + shape = [4, 32, 64] + convolution = convolutional.conv1d + elif len(kernel_size) == 2: + convolution = convolutional.conv2d + shape = [4, 32, 32, 64] + else: + shape = [4, 16, 16, 16, 64] + convolution = convolutional.conv3d + inputs = random_ops.random_normal(shape, dtype=dtype) + inputs_2norm = linalg_ops.norm(inputs) + outputs = convolution( + inputs, padding="same", filters=128, + kernel_size=kernel_size, use_bias=False, + kernel_initializer=init_ops.convolutional_delta_orthogonal( + gain=3.14)) + outputs_shape = shape[0:-1] + [128] + outputs_2norm = linalg_ops.norm(outputs) + my_ops = variables.global_variables_initializer() + with self.test_session(use_gpu=True) as sess: + sess.run(my_ops) + # Check the shape of the outputs + t = outputs.eval() + self.assertAllEqual(t.shape, outputs_shape) + # Check isometry of the delta-orthogonal kernel. + self.assertAllClose( + sess.run(inputs_2norm)/np.sqrt(np.prod(shape)), + sess.run(outputs_2norm)/(np.sqrt(np.prod(shape))*np.sqrt(3.14)), + rtol=tol, atol=tol) + + class IdentityInitializerTest(test.TestCase): def testInvalidDataType(self): diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index c7502d0fda..40ab22951b 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -542,6 +542,62 @@ class Orthogonal(Initializer): return {"gain": self.gain, "seed": self.seed, "dtype": self.dtype.name} +class ConvolutionDeltaOrthogonal(Initializer): + """Initializer that generates a delta orthogonal kernel for ConvNets. + + The shape of the tensor must have length 3, 4 or 5. The number of input + filters must not exceed the number of output filters. The center pixels of the + tensor form an orthogonal matrix. Other pixels are set to be zero. + + Args: + gain: multiplicative factor to apply to the orthogonal matrix. Default is 1. + The 2-norm of an input is multiplied by a factor of 'sqrt(gain)' after + applying this convolution. + dtype: The type of the output. + seed: A Python integer. Used to create random seeds. See + @{tf.set_random_seed} + for behavior. + """ + + def __init__(self, gain=1.0, seed=None, dtype=dtypes.float32): + self.gain = gain + self.dtype = _assert_float_dtype(dtypes.as_dtype(dtype)) + self.seed = seed + + def __call__(self, shape, dtype=None, partition_info=None): + if dtype is None: + dtype = self.dtype + # Check the shape + if len(shape) < 3 or len(shape) > 5: + raise ValueError("The tensor to initialize must be at least " + "three-dimensional and at most five-dimensional") + + if shape[-2] > shape[-1]: + raise ValueError("In_filters cannot be greater than out_filters.") + + # Generate a random matrix + a = random_ops.random_normal([shape[-1], shape[-1]], + dtype=dtype, seed=self.seed) + # Compute the qr factorization + q, _ = linalg_ops.qr(a, full_matrices=False) + q = q[:shape[-2], :] + q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) + if len(shape) == 3: + weight = array_ops.scatter_nd([[(shape[0]-1)//2]], + array_ops.expand_dims(q, 0), shape) + elif len(shape) == 4: + weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2]], + array_ops.expand_dims(q, 0), shape) + else: + weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2, + (shape[2]-1)//2]], + array_ops.expand_dims(q, 0), shape) + return weight + + def get_config(self): + return {"gain": self.gain, "seed": self.seed, "dtype": self.dtype.name} + + @tf_export("keras.initializers.Identity", "initializers.identity") class Identity(Initializer): """Initializer that generates the identity matrix. @@ -586,7 +642,7 @@ uniform_unit_scaling_initializer = UniformUnitScaling variance_scaling_initializer = VarianceScaling orthogonal_initializer = Orthogonal identity_initializer = Identity - +convolutional_delta_orthogonal = ConvolutionDeltaOrthogonal # pylint: enable=invalid-name -- GitLab From f80aaf1a3cc8da73f862b0c7218f9d8d98d2cf7a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 3 Mar 2018 15:49:05 -0800 Subject: [PATCH 293/311] Internal change. PiperOrigin-RevId: 187749767 --- .../contrib/lite/kernels/internal/quantization_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.h b/tensorflow/contrib/lite/kernels/internal/quantization_util.h index ba06bc0975..b84d2f9ee1 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util.h +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ -#define PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ #include @@ -63,4 +63,4 @@ int CalculateInputRadius(int input_integer_bits, int input_left_shift); } // namespace tflite -#endif // PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ -- GitLab From 70bdb2959a8d10cd6357ba66d5273e6fc7aa0ac1 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Sat, 3 Mar 2018 18:31:07 -0800 Subject: [PATCH 294/311] Fix broken links in docs. PiperOrigin-RevId: 187755567 --- tensorflow/docs_src/install/install_sources.md | 3 +-- tensorflow/docs_src/install/install_windows.md | 3 +-- tensorflow/docs_src/mobile/android_build.md | 4 ++-- tensorflow/docs_src/mobile/optimizing.md | 4 ++-- tensorflow/docs_src/programmers_guide/faq.md | 3 +-- tensorflow/docs_src/programmers_guide/graphs.md | 5 ++--- tensorflow/docs_src/tutorials/layers.md | 3 +-- 7 files changed, 10 insertions(+), 15 deletions(-) diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 8d83e9f119..acf0af0d9d 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -393,8 +393,7 @@ TensorFlow programs:

Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index dedf485f93..f0a30ee394 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -153,8 +153,7 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). diff --git a/tensorflow/docs_src/mobile/android_build.md b/tensorflow/docs_src/mobile/android_build.md index b5a1d5d7d1..08a5fbe41c 100644 --- a/tensorflow/docs_src/mobile/android_build.md +++ b/tensorflow/docs_src/mobile/android_build.md @@ -90,8 +90,8 @@ using [ADB](https://developer.android.com/studio/command-line/adb.html). This requires some knowledge of build systems and Android developer tools, but we'll guide you through the basics here. -- First, follow our instructions for @{$install/install_sources$installing from - sources}. This will also guide you through installing Bazel and cloning the +- First, follow our instructions for @{$install/install_sources$installing from sources}. + This will also guide you through installing Bazel and cloning the TensorFlow code. - Download the Android [SDK](https://developer.android.com/studio/index.html) diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index 44cacff5db..ca9cb043e9 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -290,8 +290,8 @@ run it on a 64-bit ARM device: You can interpret the results in exactly the same way as the desktop version above. If you have any trouble figuring out what the right input and output -names and types are, take a look at the @{$mobile/prepare_models$Preparing -models} page for details about detecting these for your model, and look at the +names and types are, take a look at the @{$mobile/prepare_models$Preparing models} +page for details about detecting these for your model, and look at the `summarize_graph` tool which may give you helpful information. diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 70931f2862..1548d43877 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -159,8 +159,7 @@ available. These operations allow you to build sophisticated @{$reading_data$input pipelines}, at the cost of making the TensorFlow computation somewhat more complicated. See the how-to documentation for -@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using -`QueueRunner` objects to drive queues and readers} +@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using `QueueRunner` objects to drive queues and readers} for more information on how to use them. ## Variables diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 9049a5a9f3..ab2ce9af2e 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,9 +210,8 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` - -If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed -configuration}, you might specify the job name and task ID to place variables on +If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed configuration}, +you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index 5111b16247..ee03f440c9 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -625,8 +625,7 @@ operation earlier when we generated the probabilities in `cnn_model_fn`. > Note: If you don't explicitly assign a name to an operation via the `name` > argument, TensorFlow will assign a default name. A couple easy ways to > discover the names applied to operations are to visualize your graph on -> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger -> (tfdbg)}. +> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger (tfdbg)}. Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the `tensors` argument. We set `every_n_iter=50`, which specifies that probabilities -- GitLab From be63d928eef26d3ea52c31147d49f6ae4032ac39 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Sat, 3 Mar 2018 22:12:24 -0800 Subject: [PATCH 295/311] Fix nested bullets in docs. (Need 4 spaces indent) PiperOrigin-RevId: 187763978 --- tensorflow/docs_src/get_started/custom_estimators.md | 10 +++++----- tensorflow/docs_src/programmers_guide/datasets.md | 4 ++-- tensorflow/docs_src/programmers_guide/graphs.md | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md index 42a246678a..185917baae 100644 --- a/tensorflow/docs_src/get_started/custom_estimators.md +++ b/tensorflow/docs_src/get_started/custom_estimators.md @@ -164,9 +164,9 @@ To implement a typical model function, you must do the following: * [Define the model](#define_the_model). * Specify additional calculations for each of the [three different modes](#modes): - * [Predict](#predict) - * [Evaluate](#evaluate) - * [Train](#train) + * [Predict](#predict) + * [Evaluate](#evaluate) + * [Train](#train) ## Define the model @@ -546,8 +546,8 @@ In brief, here's what the three graphs tell you: * accuracy: The accuracy is recorded by the following two lines: - * `eval_metric_ops={'my_accuracy': accuracy})`, during evaluation. - * `tf.summary.scalar('accuracy', accuracy[1])`, during training. + * `eval_metric_ops={'my_accuracy': accuracy})`, during evaluation. + * `tf.summary.scalar('accuracy', accuracy[1])`, during training. These tensorboard graphs are one of the main reasons it's important to pass a `global_step` to your optimizer's `minimize` method. The model can't record diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index d38fbddfa1..9ccdbde627 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -18,11 +18,11 @@ The `tf.data` API introduces two new abstractions to TensorFlow: tensors representing the image data and a label. There are two distinct ways to create a dataset: - * Creating a **source** (e.g. `Dataset.from_tensor_slices()`) constructs a + * Creating a **source** (e.g. `Dataset.from_tensor_slices()`) constructs a dataset from one or more `tf.Tensor` objects. - * Applying a **transformation** (e.g. `Dataset.batch()`) constructs a dataset + * Applying a **transformation** (e.g. `Dataset.batch()`) constructs a dataset from one or more `tf.data.Dataset` objects. * A `tf.data.Iterator` provides the main way to extract elements from a diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index ab2ce9af2e..e69b717432 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -335,20 +335,20 @@ described below. controls the behavior of the session. For example, some of the configuration options include: - * `allow_soft_placement`. Set this to `True` to enable a "soft" device + * `allow_soft_placement`. Set this to `True` to enable a "soft" device placement algorithm, which ignores @{tf.device} annotations that attempt to place CPU-only operations on a GPU device, and places them on the CPU instead. - * `cluster_def`. When using distributed TensorFlow, this option allows you + * `cluster_def`. When using distributed TensorFlow, this option allows you to specify what machines to use in the computation, and provide a mapping between job names, task indices, and network addresses. See @{tf.train.ClusterSpec.as_cluster_def} for details. - * `graph_options.optimizer_options`. Provides control over the optimizations + * `graph_options.optimizer_options`. Provides control over the optimizations that TensorFlow performs on your graph before executing it. - * `gpu_options.allow_growth`. Set this to `True` to change the GPU memory + * `gpu_options.allow_growth`. Set this to `True` to change the GPU memory allocator so that it gradually increases the amount of memory allocated, rather than allocating most of the memory at startup. -- GitLab From 806d504bbae0a7133578e85ace8b4d5779ee748f Mon Sep 17 00:00:00 2001 From: Patrick Nguyen Date: Sun, 4 Mar 2018 13:47:57 -0800 Subject: [PATCH 296/311] Prevent accidental re-use of removed field. PiperOrigin-RevId: 187798953 --- tensorflow/core/framework/function.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/framework/function.proto b/tensorflow/core/framework/function.proto index bd01e86da3..72e3c43831 100644 --- a/tensorflow/core/framework/function.proto +++ b/tensorflow/core/framework/function.proto @@ -30,7 +30,8 @@ message FunctionDef { // Attributes specific to this function definition. map attr = 5; - // NOTE: field id 2 deleted on Jan 11, 2016, GraphDef version 21. + // NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21. + reserved 2; // In both of the following fields, there is the need to specify an // output that is used as either the input to another node (in -- GitLab From 3963f0dae63dfc0383a86168bb4595d27768c9f8 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Sun, 4 Mar 2018 21:51:29 -0800 Subject: [PATCH 297/311] Correct reporter name. (#17425) --- SECURITY.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index fea24b2739..93b25cd3bb 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -233,7 +233,7 @@ v//Fw6ZeY+HmRDFdirjD7wXtIuER4vqCryIqR6Xe9X8oJXz9L/Jhslc= ### Known vulnerabilities -| Type | Versions affected | Reported by | Additional Information | -|-------------------|:-----------------:|--------------------|-----------------------------| -| out of bounds read| <=1.4 | TenCent Blade Team | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | +| Type | Versions affected | Reported by | Additional Information | +|-------------------|:-----------------:|-----------------------|-----------------------------| +| out of bounds read| <=1.4 | Blade Team of TenCent | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | -- GitLab From 2a4930b7fe3e725bacfda2ab80b17f731deecc50 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Sun, 4 Mar 2018 22:27:44 -0800 Subject: [PATCH 298/311] Correct capitalization --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index 93b25cd3bb..9f252e6818 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -235,5 +235,5 @@ v//Fw6ZeY+HmRDFdirjD7wXtIuER4vqCryIqR6Xe9X8oJXz9L/Jhslc= | Type | Versions affected | Reported by | Additional Information | |-------------------|:-----------------:|-----------------------|-----------------------------| -| out of bounds read| <=1.4 | Blade Team of TenCent | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | +| out of bounds read| <=1.4 | Blade Team of Tencent | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | -- GitLab From c3206ba3f331f135e26156c72eaabdaa5c8c2883 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 02:45:58 -0800 Subject: [PATCH 299/311] Adds checks to tf.nn.sparse_softmax_cross_entropy_with_logits to make sure that shapes for labels and logits (except last dimension) match. First, the static dimensions are checked, and only if the result is inconclusive a dynamic check is added. In sparse_softmax_cross_entropy_with_logits the input dimensions are flattened, which can lead to unexpected bugs if the order of dimensions does not match (e.g. if one is time-major and the other is batch-major). This prevents such mistakes. PiperOrigin-RevId: 187841750 --- .../python/estimator/canned/head_test.py | 7 ++- tensorflow/python/ops/nn_ops.py | 47 +++++++++++++------ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index a300f315c1..23158c76e7 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -300,7 +300,12 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): features = {'x': values_2x3} # Static shape. - with self.assertRaisesRegexp(ValueError, 'Dimensions must be equal'): + with self.assertRaisesRegexp( + ValueError, + r'Shape mismatch: The shape of labels \(received \(3,\)\) should equal ' + r'the shape of logits except for the last dimension ' + r'\(received \(2, 3\)\)\.' + ): head.create_loss( features=features, mode=model_fn.ModeKeys.EVAL, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a0d500afce..852ab365bb 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -29,6 +29,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -2025,6 +2026,9 @@ def sparse_softmax_cross_entropy_with_logits( # Store label shape for result later. labels_static_shape = labels.get_shape() labels_shape = array_ops.shape(labels) + static_shapes_fully_defined = ( + labels_static_shape.is_fully_defined() and + logits.get_shape()[:-1].is_fully_defined()) if logits.get_shape().ndims is not None and logits.get_shape().ndims == 0: raise ValueError( "Logits cannot be scalars - received shape %s." % logits.get_shape()) @@ -2034,6 +2038,12 @@ def sparse_softmax_cross_entropy_with_logits( raise ValueError("Rank mismatch: Rank of labels (received %s) should " "equal rank of logits minus 1 (received %s)." % (labels_static_shape.ndims, logits.get_shape().ndims)) + if (static_shapes_fully_defined and + labels_static_shape != logits.get_shape()[:-1]): + raise ValueError("Shape mismatch: The shape of labels (received %s) " + "should equal the shape of logits except for the last " + "dimension (received %s)." % (labels_static_shape, + logits.get_shape())) # Check if no reshapes are required. if logits.get_shape().ndims == 2: cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( @@ -2043,20 +2053,29 @@ def sparse_softmax_cross_entropy_with_logits( else: return cost - # Reshape logits to 2 dim, labels to 1 dim. - num_classes = array_ops.shape(logits)[array_ops.rank(logits) - 1] - precise_logits = array_ops.reshape(precise_logits, [-1, num_classes]) - labels = array_ops.reshape(labels, [-1]) - # The second output tensor contains the gradients. We use it in - # _CrossEntropyGrad() in nn_grad but not here. - cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( - precise_logits, labels, name=name) - cost = array_ops.reshape(cost, labels_shape) - cost.set_shape(labels_static_shape) - if logits.dtype == dtypes.float16: - return math_ops.cast(cost, dtypes.float16) - else: - return cost + # Perform a check of the dynamic shapes if the static shapes are not fully + # defined. + shape_checks = [] + if not static_shapes_fully_defined: + shape_checks.append( + check_ops.assert_equal( + array_ops.shape(labels), + array_ops.shape(logits)[:-1])) + with ops.control_dependencies(shape_checks): + # Reshape logits to 2 dim, labels to 1 dim. + num_classes = array_ops.shape(logits)[array_ops.rank(logits) - 1] + precise_logits = array_ops.reshape(precise_logits, [-1, num_classes]) + labels = array_ops.reshape(labels, [-1]) + # The second output tensor contains the gradients. We use it in + # _CrossEntropyGrad() in nn_grad but not here. + cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( + precise_logits, labels, name=name) + cost = array_ops.reshape(cost, labels_shape) + cost.set_shape(labels_static_shape) + if logits.dtype == dtypes.float16: + return math_ops.cast(cost, dtypes.float16) + else: + return cost @tf_export("nn.avg_pool") -- GitLab From 386ce8080a4ab541bcade08121f679913e85720a Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 5 Mar 2018 05:10:40 -0800 Subject: [PATCH 300/311] [XLA] Minor comment fixes in instruction_fusion.cc. No functional change. PiperOrigin-RevId: 187852483 --- tensorflow/compiler/xla/service/instruction_fusion.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index f494748e17..d69ad80bdb 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -302,7 +302,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { // Consider each operand of this instruction for fusion into this // instruction. We want to consider the operands in a particular order to - // avoid created duplicate instruction clones in the fusion instruction. + // avoid creating duplicate instruction clones in the fusion instruction. // For example, consider the following expression: // // A = ... @@ -377,7 +377,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { changed = true; if (operand->user_count() == 0) { - // Operand is now dead. Remove from post order by setting it's + // Operand is now dead. Remove from post order by setting its // location to nullptr. post_order[FindOrDie(post_order_index, operand)] = nullptr; post_order_index.erase(operand); -- GitLab From d0713d3459d3b101d3fba4ac422fae7f2c1b07a5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 05:18:24 -0800 Subject: [PATCH 301/311] Automated g4 rollback of changelist 185073515 PiperOrigin-RevId: 187852929 --- tensorflow/contrib/bayesflow/BUILD | 2 +- .../kernel_tests/halton_sequence_test.py | 101 +++++++-- .../python/ops/halton_sequence_impl.py | 201 +++++++++++++----- 3 files changed, 234 insertions(+), 70 deletions(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 3592cff90b..5fdcbffb4d 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -190,7 +190,7 @@ cuda_py_test( cuda_py_test( name = "halton_sequence_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/halton_sequence_test.py"], additional_deps = [ ":bayesflow_py", diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py index 0a85862abf..6b42bca6f9 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py @@ -36,29 +36,35 @@ class HaltonSequenceTest(test.TestCase): def test_known_values_small_bases(self): with self.test_session(): - # The first five elements of the Halton sequence with base 2 and 3 + # The first five elements of the non-randomized Halton sequence + # with base 2 and 3. expected = np.array(((1. / 2, 1. / 3), (1. / 4, 2. / 3), (3. / 4, 1. / 9), (1. / 8, 4. / 9), (5. / 8, 7. / 9)), dtype=np.float32) - sample = halton.sample(2, num_samples=5) + sample = halton.sample(2, num_results=5, randomized=False) self.assertAllClose(expected, sample.eval(), rtol=1e-6) - def test_sample_indices(self): + def test_sequence_indices(self): + """Tests access of sequence elements by index.""" with self.test_session(): dim = 5 indices = math_ops.range(10, dtype=dtypes.int32) - sample_direct = halton.sample(dim, num_samples=10) - sample_from_indices = halton.sample(dim, sample_indices=indices) + sample_direct = halton.sample(dim, num_results=10, randomized=False) + sample_from_indices = halton.sample(dim, sequence_indices=indices, + randomized=False) self.assertAllClose(sample_direct.eval(), sample_from_indices.eval(), rtol=1e-6) def test_dtypes_works_correctly(self): + """Tests that all supported dtypes work without error.""" with self.test_session(): dim = 3 - sample_float32 = halton.sample(dim, num_samples=10, dtype=dtypes.float32) - sample_float64 = halton.sample(dim, num_samples=10, dtype=dtypes.float64) + sample_float32 = halton.sample(dim, num_results=10, dtype=dtypes.float32, + seed=11) + sample_float64 = halton.sample(dim, num_results=10, dtype=dtypes.float64, + seed=21) self.assertEqual(sample_float32.eval().dtype, np.float32) self.assertEqual(sample_float64.eval().dtype, np.float64) @@ -79,7 +85,8 @@ class HaltonSequenceTest(test.TestCase): p = normal_lib.Normal(loc=mu_p, scale=sigma_p) q = normal_lib.Normal(loc=mu_q, scale=sigma_q) - cdf_sample = halton.sample(2, num_samples=n, dtype=dtypes.float64) + cdf_sample = halton.sample(2, num_results=n, dtype=dtypes.float64, + seed=1729) q_sample = q.quantile(cdf_sample) # Compute E_p[X]. @@ -90,7 +97,7 @@ class HaltonSequenceTest(test.TestCase): # Compute E_p[X^2]. e_x2 = mc.expectation_importance_sampler( f=math_ops.square, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, - seed=42) + seed=1412) stddev = math_ops.sqrt(e_x2 - math_ops.square(e_x)) # Keep the tolerance levels the same as in monte_carlo_test.py. @@ -100,10 +107,10 @@ class HaltonSequenceTest(test.TestCase): def test_docstring_example(self): # Produce the first 1000 members of the Halton sequence in 3 dimensions. - num_samples = 1000 + num_results = 1000 dim = 3 with self.test_session(): - sample = halton.sample(dim, num_samples=num_samples) + sample = halton.sample(dim, num_results=num_results, randomized=False) # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional # hypercube. @@ -115,16 +122,76 @@ class HaltonSequenceTest(test.TestCase): # Produces a relative absolute error of 1.7%. self.assertAllClose(integral.eval(), true_value.eval(), rtol=0.02) - # Now skip the first 1000 samples and recompute the integral with the next - # thousand samples. The sample_indices argument can be used to do this. + # Now skip the first 1000 samples and recompute the integral with the next + # thousand samples. The sequence_indices argument can be used to do this. - sample_indices = math_ops.range(start=1000, limit=1000 + num_samples, - dtype=dtypes.int32) - sample_leaped = halton.sample(dim, sample_indices=sample_indices) + sequence_indices = math_ops.range(start=1000, limit=1000 + num_results, + dtype=dtypes.int32) + sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, + randomized=False) integral_leaped = math_ops.reduce_mean( math_ops.reduce_prod(sample_leaped ** powers, axis=-1)) - self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.001) + self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.05) + + def test_randomized_qmc_basic(self): + """Tests the randomization of the Halton sequences.""" + # This test is identical to the example given in Owen (2017), Figure 5. + + dim = 20 + num_results = 2000 + replica = 5 + + with self.test_session(): + sample = halton.sample(dim, num_results=num_results, seed=121117) + f = math_ops.reduce_mean(math_ops.reduce_sum(sample, axis=1) ** 2) + values = [f.eval() for _ in range(replica)] + self.assertAllClose(np.mean(values), 101.6667, atol=np.std(values) * 2) + + def test_partial_sum_func_qmc(self): + """Tests the QMC evaluation of (x_j + x_{j+1} ...+x_{n})^2. + + A good test of QMC is provided by the function: + + f(x_1,..x_n, x_{n+1}, ..., x_{n+m}) = (x_{n+1} + ... x_{n+m} - m / 2)^2 + + with the coordinates taking values in the unit interval. The mean and + variance of this function (with the uniform distribution over the + unit-hypercube) is exactly calculable: + + = m / 12, Var(f) = m (5m - 3) / 360 + + The purpose of the "shift" (if n > 0) in the coordinate dependence of the + function is to provide a test for Halton sequence which exhibit more + dependence in the higher axes. + + This test confirms that the mean squared error of RQMC estimation falls + as O(N^(2-e)) for any e>0. + """ + + n, m = 10, 10 + dim = n + m + num_results_lo, num_results_hi = 1000, 10000 + replica = 20 + true_mean = m / 12. + + def func_estimate(x): + return math_ops.reduce_mean( + (math_ops.reduce_sum(x[:, -m:], axis=-1) - m / 2.0) ** 2) + + with self.test_session(): + sample_lo = halton.sample(dim, num_results=num_results_lo, seed=1925) + sample_hi = halton.sample(dim, num_results=num_results_hi, seed=898128) + f_lo, f_hi = func_estimate(sample_lo), func_estimate(sample_hi) + + estimates = np.array([(f_lo.eval(), f_hi.eval()) for _ in range(replica)]) + var_lo, var_hi = np.mean((estimates - true_mean) ** 2, axis=0) + + # Expect that the variance scales as N^2 so var_hi / var_lo ~ k / 10^2 + # with k a fudge factor accounting for the residual N dependence + # of the QMC error and the sampling error. + log_rel_err = np.log(100 * var_hi / var_lo) + self.assertAllClose(log_rel_err, 0.0, atol=1.2) if __name__ == '__main__': diff --git a/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py b/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py index 8cabf18903..35962109bc 100644 --- a/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py @@ -26,8 +26,9 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops - +from tensorflow.python.ops import random_ops __all__ = [ 'sample', @@ -39,32 +40,45 @@ __all__ = [ _MAX_DIMENSION = 1000 -def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): - r"""Returns a sample from the `m` dimensional Halton sequence. +def sample(dim, + num_results=None, + sequence_indices=None, + dtype=None, + randomized=True, + seed=None, + name=None): + r"""Returns a sample from the `dim` dimensional Halton sequence. Warning: The sequence elements take values only between 0 and 1. Care must be taken to appropriately transform the domain of a function if it differs from the unit cube before evaluating integrals using Halton samples. It is also - important to remember that quasi-random numbers are not a replacement for - pseudo-random numbers in every context. Quasi random numbers are completely - deterministic and typically have significant negative autocorrelation (unless - randomized). + important to remember that quasi-random numbers without randomization are not + a replacement for pseudo-random numbers in every context. Quasi random numbers + are completely deterministic and typically have significant negative + autocorrelation unless randomization is used. Computes the members of the low discrepancy Halton sequence in dimension - `dim`. The d-dimensional sequence takes values in the unit hypercube in d - dimensions. Currently, only dimensions up to 1000 are supported. The prime - base for the `k`-th axes is the k-th prime starting from 2. For example, - if dim = 3, then the bases will be [2, 3, 5] respectively and the first - element of the sequence will be: [0.5, 0.333, 0.2]. For a more complete - description of the Halton sequences see: + `dim`. The `dim`-dimensional sequence takes values in the unit hypercube in + `dim` dimensions. Currently, only dimensions up to 1000 are supported. The + prime base for the k-th axes is the k-th prime starting from 2. For example, + if `dim` = 3, then the bases will be [2, 3, 5] respectively and the first + element of the non-randomized sequence will be: [0.5, 0.333, 0.2]. For a more + complete description of the Halton sequences see: https://en.wikipedia.org/wiki/Halton_sequence. For low discrepancy sequences and their applications see: https://en.wikipedia.org/wiki/Low-discrepancy_sequence. - The user must supply either `num_samples` or `sample_indices` but not both. + If `randomized` is true, this function produces a scrambled version of the + Halton sequence introduced by Owen in arXiv:1706.02808. For the advantages of + randomization of low discrepancy sequences see: + https://en.wikipedia.org/wiki/Quasi-Monte_Carlo_method#Randomization_of_quasi-Monte_Carlo + + The number of samples produced is controlled by the `num_results` and + `sequence_indices` parameters. The user must supply either `num_results` or + `sequence_indices` but not both. The former is the number of samples to produce starting from the first - element. If `sample_indices` is given instead, the specified elements of - the sequence are generated. For example, sample_indices=tf.range(10) is + element. If `sequence_indices` is given instead, the specified elements of + the sequence are generated. For example, sequence_indices=tf.range(10) is equivalent to specifying n=10. Example Use: @@ -73,9 +87,9 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): bf = tf.contrib.bayesflow # Produce the first 1000 members of the Halton sequence in 3 dimensions. - num_samples = 1000 + num_results = 1000 dim = 3 - sample = bf.halton_sequence.sample(dim, num_samples=num_samples) + sample = bf.halton_sequence.sample(dim, num_results=num_results, seed=127) # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional # hypercube. @@ -89,12 +103,13 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): print ("Estimated: %f, True Value: %f" % values) # Now skip the first 1000 samples and recompute the integral with the next - # thousand samples. The sample_indices argument can be used to do this. + # thousand samples. The sequence_indices argument can be used to do this. - sample_indices = tf.range(start=1000, limit=1000 + num_samples, - dtype=tf.int32) - sample_leaped = halton.sample(dim, sample_indices=sample_indices) + sequence_indices = tf.range(start=1000, limit=1000 + num_results, + dtype=tf.int32) + sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, + seed=111217) integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers, axis=-1)) @@ -107,51 +122,57 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): Args: dim: Positive Python `int` representing each sample's `event_size.` Must not be greater than 1000. - num_samples: (Optional) positive Python `int`. The number of samples to - generate. Either this parameter or sample_indices must be specified but + num_results: (Optional) positive Python `int`. The number of samples to + generate. Either this parameter or sequence_indices must be specified but not both. If this parameter is None, then the behaviour is determined by - the `sample_indices`. - sample_indices: (Optional) `Tensor` of dtype int32 and rank 1. The elements - of the sequence to compute specified by their position in the sequence. - The entries index into the Halton sequence starting with 0 and hence, - must be whole numbers. For example, sample_indices=[0, 5, 6] will produce - the first, sixth and seventh elements of the sequence. If this parameter - is None, then the `num_samples` parameter must be specified which gives - the number of desired samples starting from the first sample. + the `sequence_indices`. + sequence_indices: (Optional) `Tensor` of dtype int32 and rank 1. The + elements of the sequence to compute specified by their position in the + sequence. The entries index into the Halton sequence starting with 0 and + hence, must be whole numbers. For example, sequence_indices=[0, 5, 6] will + produce the first, sixth and seventh elements of the sequence. If this + parameter is None, then the `num_results` parameter must be specified + which gives the number of desired samples starting from the first sample. dtype: (Optional) The dtype of the sample. One of `float32` or `float64`. Default is `float32`. + randomized: (Optional) bool indicating whether to produce a randomized + Halton sequence. If True, applies the randomization described in + Owen (2017) [arXiv:1706.02808]. + seed: (Optional) Python integer to seed the random number generator. Only + used if `randomized` is True. If not supplied and `randomized` is True, + no seed is set. name: (Optional) Python `str` describing ops managed by this function. If not supplied the name of this function is used. Returns: halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype - and `shape` `[num_samples, dim]` if `num_samples` was specified or shape - `[s, dim]` where s is the size of `sample_indices` if `sample_indices` + and `shape` `[num_results, dim]` if `num_results` was specified or shape + `[s, dim]` where s is the size of `sequence_indices` if `sequence_indices` were specified. Raises: - ValueError: if both `sample_indices` and `num_samples` were specified or + ValueError: if both `sequence_indices` and `num_results` were specified or if dimension `dim` is less than 1 or greater than 1000. """ if dim < 1 or dim > _MAX_DIMENSION: raise ValueError( 'Dimension must be between 1 and {}. Supplied {}'.format(_MAX_DIMENSION, dim)) - if (num_samples is None) == (sample_indices is None): - raise ValueError('Either `num_samples` or `sample_indices` must be' + if (num_results is None) == (sequence_indices is None): + raise ValueError('Either `num_results` or `sequence_indices` must be' ' specified but not both.') dtype = dtype or dtypes.float32 if not dtype.is_floating: raise ValueError('dtype must be of `float`-type') - with ops.name_scope(name, 'sample', values=[sample_indices]): + with ops.name_scope(name, 'sample', values=[sequence_indices]): # Here and in the following, the shape layout is as follows: # [sample dimension, event dimension, coefficient dimension]. # The coefficient dimension is an intermediate axes which will hold the # weights of the starting integer when expressed in the (prime) base for # an event dimension. - indices = _get_indices(num_samples, sample_indices, dtype) + indices = _get_indices(num_results, sequence_indices, dtype) radixes = array_ops.constant(_PRIMES[0:dim], dtype=dtype, shape=[dim, 1]) max_sizes_by_axes = _base_expansion_size(math_ops.reduce_max(indices), @@ -170,17 +191,92 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): # though we don't need it. We avoid this by setting the exponents for each # axes to 0 beyond the maximum value needed for that dimension. exponents_by_axes = array_ops.tile([math_ops.range(max_size)], [dim, 1]) - weight_mask = exponents_by_axes > max_sizes_by_axes + + # The mask is true for those coefficients that are irrelevant. + weight_mask = exponents_by_axes >= max_sizes_by_axes capped_exponents = array_ops.where( weight_mask, array_ops.zeros_like(exponents_by_axes), exponents_by_axes) weights = radixes ** capped_exponents + # The following computes the base b expansion of the indices. Suppose, + # x = a0 + a1*b + a2*b^2 + ... Then, performing a floor div of x with + # the vector (1, b, b^2, b^3, ...) will produce + # (a0 + s1 * b, a1 + s2 * b, ...) where s_i are coefficients we don't care + # about. Noting that all a_i < b by definition of place value expansion, + # we see that taking the elements mod b of the above vector produces the + # place value expansion coefficients. coeffs = math_ops.floor_div(indices, weights) coeffs *= 1 - math_ops.cast(weight_mask, dtype) - coeffs = (coeffs % radixes) / radixes - return math_ops.reduce_sum(coeffs / weights, axis=-1) + coeffs %= radixes + if not randomized: + coeffs /= radixes + return math_ops.reduce_sum(coeffs / weights, axis=-1) + coeffs = _randomize(coeffs, radixes, seed=seed) + # Remove the contribution from randomizing the trailing zero for the + # axes where max_size_by_axes < max_size. This will be accounted + # for separately below (using zero_correction). + coeffs *= 1 - math_ops.cast(weight_mask, dtype) + coeffs /= radixes + base_values = math_ops.reduce_sum(coeffs / weights, axis=-1) + + # The randomization used in Owen (2017) does not leave 0 invariant. While + # we have accounted for the randomization of the first `max_size_by_axes` + # coefficients, we still need to correct for the trailing zeros. Luckily, + # this is equivalent to adding a uniform random value scaled so the first + # `max_size_by_axes` coefficients are zero. The following statements perform + # this correction. + zero_correction = random_ops.random_uniform([dim, 1], seed=seed, + dtype=dtype) + zero_correction /= (radixes ** max_sizes_by_axes) + return base_values + array_ops.reshape(zero_correction, [-1]) + + +def _randomize(coeffs, radixes, seed=None): + """Applies the Owen randomization to the coefficients.""" + given_dtype = coeffs.dtype + coeffs = math_ops.to_int32(coeffs) + num_coeffs = array_ops.shape(coeffs)[-1] + radixes = array_ops.reshape(math_ops.to_int32(radixes), [-1]) + perms = _get_permutations(num_coeffs, radixes, seed=seed) + perms = array_ops.reshape(perms, [-1]) + radix_sum = math_ops.reduce_sum(radixes) + radix_offsets = array_ops.reshape(math_ops.cumsum(radixes, exclusive=True), + [-1, 1]) + offsets = radix_offsets + math_ops.range(num_coeffs) * radix_sum + permuted_coeffs = array_ops.gather(perms, coeffs + offsets) + return math_ops.cast(permuted_coeffs, dtype=given_dtype) + + +def _get_permutations(num_results, dims, seed=None): + """Uniform iid sample from the space of permutations. + + Draws a sample of size `num_results` from the group of permutations of degrees + specified by the `dims` tensor. These are packed together into one tensor + such that each row is one sample from each of the dimensions in `dims`. For + example, if dims = [2,3] and num_results = 2, the result is a tensor of shape + [2, 2 + 3] and the first row of the result might look like: + [1, 0, 2, 0, 1]. The first two elements are a permutation over 2 elements + while the next three are a permutation over 3 elements. + + Args: + num_results: A positive scalar `Tensor` of integral type. The number of + draws from the discrete uniform distribution over the permutation groups. + dims: A 1D `Tensor` of the same dtype as `num_results`. The degree of the + permutation groups from which to sample. + seed: (Optional) Python integer to seed the random number generator. + Returns: + permutations: A `Tensor` of shape `[num_results, sum(dims)]` and the same + dtype as `dims`. + """ + sample_range = math_ops.range(num_results) + def generate_one(d): + fn = lambda _: random_ops.random_shuffle(math_ops.range(d), seed=seed) + return functional_ops.map_fn(fn, sample_range) + return array_ops.concat([generate_one(d) for d in array_ops.unstack(dims)], + axis=-1) -def _get_indices(n, sample_indices, dtype, name=None): + +def _get_indices(n, sequence_indices, dtype, name=None): """Generates starting points for the Halton sequence procedure. The k'th element of the sequence is generated starting from a positive integer @@ -191,10 +287,10 @@ def _get_indices(n, sample_indices, dtype, name=None): Args: n: Positive `int`. The number of samples to generate. If this - parameter is supplied, then `sample_indices` should be None. - sample_indices: `Tensor` of dtype int32 and rank 1. The entries + parameter is supplied, then `sequence_indices` should be None. + sequence_indices: `Tensor` of dtype int32 and rank 1. The entries index into the Halton sequence starting with 0 and hence, must be whole - numbers. For example, sample_indices=[0, 5, 6] will produce the first, + numbers. For example, sequence_indices=[0, 5, 6] will produce the first, sixth and seventh elements of the sequence. If this parameter is not None then `n` must be None. dtype: The dtype of the sample. One of `float32` or `float64`. @@ -204,14 +300,14 @@ def _get_indices(n, sample_indices, dtype, name=None): Returns: indices: `Tensor` of dtype `dtype` and shape = `[n, 1, 1]`. """ - with ops.name_scope(name, 'get_indices', [n, sample_indices]): - if sample_indices is None: - sample_indices = math_ops.range(n, dtype=dtype) + with ops.name_scope(name, '_get_indices', [n, sequence_indices]): + if sequence_indices is None: + sequence_indices = math_ops.range(n, dtype=dtype) else: - sample_indices = math_ops.cast(sample_indices, dtype) + sequence_indices = math_ops.cast(sequence_indices, dtype) # Shift the indices so they are 1 based. - indices = sample_indices + 1 + indices = sequence_indices + 1 # Reshape to make space for the event dimension and the place value # coefficients. @@ -222,7 +318,7 @@ def _base_expansion_size(num, bases): """Computes the number of terms in the place value expansion. Let num = a0 + a1 b + a2 b^2 + ... ak b^k be the place value expansion of - `num` in base b (ak <> 0). This function computes and returns `k` for each + `num` in base b (ak <> 0). This function computes and returns `k+1` for each base `b` specified in `bases`. This can be inferred from the base `b` logarithm of `num` as follows: @@ -261,4 +357,5 @@ def _primes_less_than(n): _PRIMES = _primes_less_than(7919+1) + assert len(_PRIMES) == _MAX_DIMENSION -- GitLab From 9423044b971615027c86128adaa2cf2cfacb290a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 06:51:33 -0800 Subject: [PATCH 302/311] Improve LinearValidOnShape. It actually only needs to check that the operation is a bitcast (ignoring element_type). So far, the check was more restrictive, which made this function always return false for a non-trivial reshape operation. However we still fail to make use of this less strict checking, because for reshapes inside a fusion node, we don't have a layout and can therefore not check if it is a bitcast or not. Also add a disabled test that will be enabled once the layout issue is fixed. PiperOrigin-RevId: 187860440 --- tensorflow/compiler/xla/service/llvm_ir/ir_array.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index f3642cf0a1..9aa0ce507b 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -106,16 +106,13 @@ IrArray::IrArray(llvm::Value* base_ptr, const Shape& shape) } } -// Returns whether given linear index valid on given shape. +// Returns whether the given linear index is valid on the given shape. bool IrArray::Index::LinearValidOnShape(const Shape& a) const { - auto b = ShapeUtil::MakeShape(PRED /* irrelevant */, dims_); + auto b = ShapeUtil::MakeShape(a.element_type(), dims_); *b.mutable_layout() = layout_; return linear_ != nullptr && - ContainersEqual( - ShapeUtil::StripDegenerateDimensions(a).dimensions(), - ShapeUtil::StripDegenerateDimensions(b).dimensions()) && - LayoutUtil::Equal(ShapeUtil::StripDegenerateDimensions(a).layout(), - ShapeUtil::StripDegenerateDimensions(b).layout()); + ShapeUtil::ElementsIn(a) == ShapeUtil::ElementsIn(b) && + ShapeUtil::ReshapeIsBitcast(a, b); } IrArray::Index IrArray::Index::SourceIndexOfReshape( -- GitLab From 3a2e7635e69b5b1d1f510108d7a601edc570abc8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 07:43:58 -0800 Subject: [PATCH 303/311] Internal change. PiperOrigin-RevId: 187865303 --- tensorflow/contrib/lite/kernels/test_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h index 7d476ba1ea..a9064d54e7 100644 --- a/tensorflow/contrib/lite/kernels/test_util.h +++ b/tensorflow/contrib/lite/kernels/test_util.h @@ -39,10 +39,10 @@ inline std::vector Quantize(const std::vector& data, float scale, int32_t zero_point) { std::vector q; for (float f : data) { - q.push_back(std::max( + q.push_back(static_cast(std::max( std::numeric_limits::min(), - std::min(std::numeric_limits::max(), - static_cast(std::round(zero_point + (f / scale)))))); + std::min(std::numeric_limits::max(), + std::round(zero_point + (f / scale)))))); } return q; } -- GitLab From 5e53ba5a33ee116179bc4ac4f09be76811eb3960 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 09:01:22 -0800 Subject: [PATCH 304/311] Fix a case in SparseSegmentReduction ops with missing segment IDs, where all segment IDs are empty. Added a test for this case. PiperOrigin-RevId: 187873356 --- .../core/kernels/segment_reduction_ops.cc | 7 ++++++- .../segment_reduction_ops_test.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 27b8081eb8..bbf8696531 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -616,7 +616,12 @@ class SparseSegmentReductionOpBase : public OpKernel { // we need to explicitly set missing indices to the default value. Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); - if (num_indices == 0) return; + if (num_indices == 0) { + if (output_rows > 0) { + output->flat_outer_dims().setConstant(default_value_); + } + return; + } OP_REQUIRES(context, output_rows > 0, errors::InvalidArgument("segment ids must be >= 0")); auto output_flat = output->flat_outer_dims(); diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 5a54f448d0..239a48d273 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -507,6 +507,25 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper): tf_ans = s.eval() self.assertAllClose(np_ans, tf_ans) + def testWithEmptySegments(self): + tf_x = constant_op.constant([], shape=[0, 4], dtype=dtypes_lib.float32) + ops_list = [ + math_ops.sparse_segment_sum_with_num_segments, + math_ops.sparse_segment_mean_with_num_segments + ] + segment_indices = [] + tf_indices = [] + num_segments = 5 + with self.test_session(use_gpu=False): + for tf_op in ops_list: + s = tf_op( + data=tf_x, + indices=tf_indices, + segment_ids=segment_indices, + num_segments=num_segments) + tf_ans = s.eval() + self.assertAllClose(np.zeros([5, 4]), tf_ans) + def testSegmentIdsGreaterThanZero(self): tf_x, np_x = self._input([10, 4], dtype=dtypes_lib.float32) ops_list = [(np.add, None, math_ops.sparse_segment_sum), ( -- GitLab From b0ee6b63b865d15ff722a74bbc89805e5e12c024 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 5 Mar 2018 09:18:24 -0800 Subject: [PATCH 305/311] Change the default ps_ops to STANDARD_PS_OPS PiperOrigin-RevId: 187875797 --- tensorflow/python/training/device_setter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/training/device_setter.py b/tensorflow/python/training/device_setter.py index 0e824d89e9..d31c375b4c 100644 --- a/tensorflow/python/training/device_setter.py +++ b/tensorflow/python/training/device_setter.py @@ -179,8 +179,7 @@ def replica_device_setter(ps_tasks=0, ps_device="/job:ps", than overriding them. cluster: `ClusterDef` proto or `ClusterSpec`. ps_ops: List of strings representing `Operation` types that need to be - placed on `ps` devices. If `None`, defaults to - `["Variable", "VariableV2", "VarHandleOp"]`. + placed on `ps` devices. If `None`, defaults to `STANDARD_PS_OPS`. ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by `ps_ops`), that takes the `Operation` and returns the ps task index to use. If `None`, defaults to a round-robin strategy across all `ps` @@ -210,7 +209,7 @@ def replica_device_setter(ps_tasks=0, ps_device="/job:ps", if ps_ops is None: # TODO(sherrym): Variables in the LOCAL_VARIABLES collection should not be # placed in the parameter server. - ps_ops = ["Variable", "VariableV2", "VarHandleOp"] + ps_ops = list(STANDARD_PS_OPS) if not merge_devices: logging.warning( -- GitLab From f547b77cd8aac0a2142e8f4bf80107fc52a4ef05 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 5 Mar 2018 09:51:38 -0800 Subject: [PATCH 306/311] [XLA:GPU] Add some VLOGs to FusionMerger. Also use c_any_of and friends instead of std::any_of &c, and make some minor whitespace fixes in comments. No functional change. PiperOrigin-RevId: 187880113 --- .../compiler/xla/service/gpu/fusion_merger.cc | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc index c137fbc97e..91a916f67c 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -45,6 +45,7 @@ void MaybeResolveTupleElements(HloInstruction* instruction, // Returns the bytes read by fusion parameter 'param', by returning the byte // size of 'param' shape (or the cumulative byte sizes of all leaf tuple // elements if 'param' is tuple-shaped). +// // In the special case where all users of 'param' (or all users of a leaf // tuple element if 'param' is tuple-shaped) are Slice instructions, the size // of each slice instruction is accumulated instead, to give a more accurate @@ -63,11 +64,10 @@ double CalculateBytesReadByFusionParameter(HloInstruction* param) { // Slice for a more accurate estimate of bytes read. double bytes = 0.0; for (auto& instruction : instructions) { - if (std::all_of(instruction->users().begin(), instruction->users().end(), - [](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kSlice || - instruction->opcode() == HloOpcode::kDynamicSlice; - })) { + if (c_all_of(instruction->users(), [](const HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kSlice || + instruction->opcode() == HloOpcode::kDynamicSlice; + })) { // All users are slice: accumulate bytes of all user slice instructions. for (auto& user : instruction->users()) { bytes += ShapeUtil::ByteSizeOf(user->shape()); @@ -199,6 +199,7 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { ++total_visited_; // Skip 'fusion' instruction if there are no users into which we can merge. if (fusion->users().empty()) { + VLOG(3) << "Not merging " << fusion->name() << ": Has no users."; ++num_fail_no_users_; return Status::OK(); } @@ -208,24 +209,26 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // Input fusion instructions need to be rooted at a particular HLO (e.g. // kReduce), so they shouldn't be further fused either. if (fusion->fusion_kind() != HloInstruction::FusionKind::kLoop) { + VLOG(3) << "Not merging " << fusion->name() << ": Is not loop fusion."; ++num_fail_not_loop_fusion_; return Status::OK(); } // Skip multiple output fusion. It's not yet supported. if (fusion->IsMultiOutputFusion()) { + VLOG(3) << "Not merging " << fusion->name() << ": Is multi-output fusion."; ++num_fail_not_loop_fusion_; return Status::OK(); } // Skip 'fusion' instruction if we cannot merge into all of its users. // Merging into all users enables the removal of 'fusion' from the // computation. - if (!std::all_of(fusion->users().begin(), fusion->users().end(), - [](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kFusion && - instruction->fusion_kind() == - HloInstruction::FusionKind::kLoop; - })) { + if (!c_all_of(fusion->users(), [](const HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kFusion && + instruction->fusion_kind() == HloInstruction::FusionKind::kLoop; + })) { + VLOG(3) << "Not merging " << fusion->name() + << ": Some of its users are not loop/input fusion kernels."; ++num_fail_merge_all_users_; return Status::OK(); } @@ -233,18 +236,17 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // Skip 'fusion' instruction if any of its fused instructions are expensive. // This is done to avoid the duplication of expensive instructions, which // would occur if 'fusion' were merged into multiple users. + // // If 'fusion' has just one user, then an earlier fusion pass chose not to // fuse this producer/comsumer pair (likely because of expensive instruction // re-use by the consumer), and so we honor that choice here as well. - if (!std::all_of(fusion->fused_instructions().begin(), - fusion->fused_instructions().end(), - [](const HloInstruction* instruction) { - if (instruction->opcode() != HloOpcode::kParameter && - GpuInstructionFusion::IsExpensive(*instruction)) { - return false; - } - return true; - })) { + if (c_any_of(fusion->fused_instructions(), + [](const HloInstruction* instruction) { + return instruction->opcode() != HloOpcode::kParameter && + GpuInstructionFusion::IsExpensive(*instruction); + })) { + VLOG(3) << "Not merging " << fusion->name() + << ": Contains one or more expensive instructions."; ++num_fail_expensive_fused_instruction_; return Status::OK(); } @@ -253,6 +255,8 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // exceeds the threshold value. if (CalculateFlopsToBytesRatio(fusion) > FusionMerger::GetThresholdFlopsToBytesRatio()) { + VLOG(3) << "Not merging " << fusion->name() + << ": flops-to-bytes ratio is not favorable."; ++num_fail_flops_to_byte_ratio_; return Status::OK(); } @@ -265,6 +269,9 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { const double merged_to_current_bytes_ratio = merged_bytes_transferred / std::max(1.0, current_bytes_transferred); if (merged_to_current_bytes_ratio > 1.10) { + VLOG(3) << "Not merging " << fusion->name() + << ": merged-to-current-bytes-ratio of " + << merged_to_current_bytes_ratio << " is not favorable."; ++num_fail_net_bytes_transferred_ratio_; return Status::OK(); } -- GitLab From f09e7f9ebad85b3395628381777cba3e71f768a5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 10:07:27 -0800 Subject: [PATCH 307/311] Exposes poisson_regression_head in tf.contrib.estimator. PiperOrigin-RevId: 187882494 --- tensorflow/contrib/estimator/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py index 0f75b77050..6b9f9575b6 100644 --- a/tensorflow/contrib/estimator/__init__.py +++ b/tensorflow/contrib/estimator/__init__.py @@ -39,6 +39,7 @@ _allowed_symbols = [ 'multi_class_head', 'multi_head', 'multi_label_head', + 'poisson_regression_head', 'regression_head', 'DNNEstimator', 'DNNLinearCombinedEstimator', -- GitLab From 602f54c065eb9513ef3bb8557887d106637f96e5 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Mon, 5 Mar 2018 10:11:20 -0800 Subject: [PATCH 308/311] Make SavedModel builder validation accept signatures involving sparse tensors. PiperOrigin-RevId: 187883080 --- tensorflow/python/saved_model/builder_impl.py | 11 +-- .../python/saved_model/saved_model_test.py | 72 +++++++++++++++---- 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py index 7347da7536..3447d917e9 100644 --- a/tensorflow/python/saved_model/builder_impl.py +++ b/tensorflow/python/saved_model/builder_impl.py @@ -193,7 +193,8 @@ class SavedModelBuilder(object): def _validate_tensor_info(self, tensor_info): """Validates the `TensorInfo` proto. - Checks if the `name` and `dtype` fields exist and are non-empty. + Checks if the `encoding` (`name` or `coo_sparse`) and `dtype` fields exist + and are non-empty. Args: tensor_info: `TensorInfo` protocol buffer to validate. @@ -206,10 +207,12 @@ class SavedModelBuilder(object): raise AssertionError( "All TensorInfo protos used in the SignatureDefs must have the name " "and dtype fields set.") - if not tensor_info.name: + if tensor_info.WhichOneof("encoding") is None: + # TODO(soergel) validate each of the fields of coo_sparse raise AssertionError( - "All TensorInfo protos used in the SignatureDefs must have the name " - "field set: %s" % tensor_info) + "All TensorInfo protos used in the SignatureDefs must have one of " + "the 'encoding' fields (e.g., name or coo_sparse) set: %s" + % tensor_info) if tensor_info.dtype is types_pb2.DT_INVALID: raise AssertionError( "All TensorInfo protos used in the SignatureDefs must have the dtype " diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index d9d3168825..804255375e 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -94,7 +94,7 @@ class SavedModelTest(test.TestCase): self.assertEqual(expected_asset_file_name, asset.filename) self.assertEqual(expected_asset_tensor_name, asset.tensor_info.name) - def _validate_inputs_tensor_info(self, builder, tensor_info): + def _validate_inputs_tensor_info_fail(self, builder, tensor_info): with self.test_session(graph=ops.Graph()) as sess: self._init_and_validate_variable(sess, "v", 42) @@ -107,7 +107,18 @@ class SavedModelTest(test.TestCase): sess, ["foo"], signature_def_map={"foo_key": foo_signature}) - def _validate_outputs_tensor_info(self, builder, tensor_info): + def _validate_inputs_tensor_info_accept(self, builder, tensor_info): + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + foo_signature = signature_def_utils.build_signature_def({ + "foo_inputs": tensor_info + }, dict(), "foo") + builder.add_meta_graph_and_variables( + sess, ["foo"], + signature_def_map={"foo_key": foo_signature}) + + def _validate_outputs_tensor_info_fail(self, builder, tensor_info): with self.test_session(graph=ops.Graph()) as sess: self._init_and_validate_variable(sess, "v", 42) @@ -119,6 +130,16 @@ class SavedModelTest(test.TestCase): sess, ["foo"], signature_def_map={"foo_key": foo_signature}) + def _validate_outputs_tensor_info_accept(self, builder, tensor_info): + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + foo_signature = signature_def_utils.build_signature_def( + dict(), {"foo_outputs": tensor_info}, "foo") + builder.add_meta_graph_and_variables( + sess, ["foo"], + signature_def_map={"foo_key": foo_signature}) + def testMaybeSavedModelDir(self): base_path = test.test_src_dir_path("/python/saved_model") self.assertFalse(loader.maybe_saved_model_directory(base_path)) @@ -538,23 +559,50 @@ class SavedModelTest(test.TestCase): self.assertEqual("bar", bar_signature["bar_key"].method_name) self.assertEqual("foo_new", bar_signature["foo_key"].method_name) - def testSignatureDefValidation(self): - export_dir = self._get_export_dir("test_signature_def_validation") + def testSignatureDefValidationFails(self): + export_dir = self._get_export_dir("test_signature_def_validation_fail") builder = saved_model_builder.SavedModelBuilder(export_dir) - tensor_without_name = meta_graph_pb2.TensorInfo() - tensor_without_name.dtype = types_pb2.DT_FLOAT - self._validate_inputs_tensor_info(builder, tensor_without_name) - self._validate_outputs_tensor_info(builder, tensor_without_name) + tensor_without_encoding = meta_graph_pb2.TensorInfo() + tensor_without_encoding.dtype = types_pb2.DT_FLOAT + self._validate_inputs_tensor_info_fail(builder, tensor_without_encoding) + self._validate_outputs_tensor_info_fail(builder, tensor_without_encoding) tensor_without_dtype = meta_graph_pb2.TensorInfo() tensor_without_dtype.name = "x" - self._validate_inputs_tensor_info(builder, tensor_without_dtype) - self._validate_outputs_tensor_info(builder, tensor_without_dtype) + self._validate_inputs_tensor_info_fail(builder, tensor_without_dtype) + self._validate_outputs_tensor_info_fail(builder, tensor_without_dtype) tensor_empty = meta_graph_pb2.TensorInfo() - self._validate_inputs_tensor_info(builder, tensor_empty) - self._validate_outputs_tensor_info(builder, tensor_empty) + self._validate_inputs_tensor_info_fail(builder, tensor_empty) + self._validate_outputs_tensor_info_fail(builder, tensor_empty) + + def testSignatureDefValidationSucceedsWithName(self): + tensor_with_name = meta_graph_pb2.TensorInfo() + tensor_with_name.name = "foo" + tensor_with_name.dtype = types_pb2.DT_FLOAT + + export_dir = self._get_export_dir("test_signature_def_validation_name_1") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_inputs_tensor_info_accept(builder, tensor_with_name) + + export_dir = self._get_export_dir("test_signature_def_validation_name_2") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_outputs_tensor_info_accept(builder, tensor_with_name) + + def testSignatureDefValidationSucceedsWithCoo(self): + tensor_with_coo = meta_graph_pb2.TensorInfo() + # TODO(soergel) test validation of each of the fields of coo_sparse + tensor_with_coo.coo_sparse.values_tensor_name = "foo" + tensor_with_coo.dtype = types_pb2.DT_FLOAT + + export_dir = self._get_export_dir("test_signature_def_validation_coo_1") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_inputs_tensor_info_accept(builder, tensor_with_coo) + + export_dir = self._get_export_dir("test_signature_def_validation_coo_2") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_outputs_tensor_info_accept(builder, tensor_with_coo) def testAssets(self): export_dir = self._get_export_dir("test_assets") -- GitLab From 9f9bd5c71e5cc94d16e8295386445961880744ae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 10:47:24 -0800 Subject: [PATCH 309/311] Fix documentation of image size for inception-v3 (299 * 299) PiperOrigin-RevId: 187889122 --- tensorflow/contrib/lite/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 00e93d2c4f..df8c1c623c 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -91,7 +91,7 @@ Currently, we only support building the Android demo app within a Python 2 environment (due to a Bazel bug). ### More about the demo -The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used (229 * 229 for Inception-v3). The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch. 224 * 224 (299 * 299) is the width and height of the image. 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. Both models have 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The model file must be downloaded and bundled within the assets directory of the app. +The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used (299 * 299 for Inception-v3). The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch. 224 * 224 (299 * 299) is the width and height of the image. 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. Both models have 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The model file must be downloaded and bundled within the assets directory of the app. # iOS Demo App -- GitLab From 8382cbabf2a15f22d22a291fc47776113e6ec77c Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 5 Mar 2018 11:10:42 -0800 Subject: [PATCH 310/311] [XLA:GPU] Allow merging into input fusion nodes in FusionMerger. Seems to have been an oversight. "Input fusion" means that the *output* of the fusion node is the "real hero". The inputs aren't special; we can fuse more stuff in. PiperOrigin-RevId: 187892975 --- tensorflow/compiler/xla/service/gpu/BUILD | 2 + .../compiler/xla/service/gpu/fusion_merger.cc | 7 ++-- .../xla/service/gpu/fusion_merger_test.cc | 41 +++++++++++++++++++ .../xla/service/gpu/ir_emitter_unnested.cc | 7 ++++ 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 334efff1e6..cecbc25192 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -437,8 +437,10 @@ tf_cc_test( ":fusion_merger", ":instruction_fusion", "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/service:hlo_matchers", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc index 91a916f67c..3cd30b754c 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -223,9 +223,10 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // Skip 'fusion' instruction if we cannot merge into all of its users. // Merging into all users enables the removal of 'fusion' from the // computation. - if (!c_all_of(fusion->users(), [](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kFusion && - instruction->fusion_kind() == HloInstruction::FusionKind::kLoop; + if (!c_all_of(fusion->users(), [](const HloInstruction* user) { + return user->opcode() == HloOpcode::kFusion && + (user->fusion_kind() == HloInstruction::FusionKind::kLoop || + user->fusion_kind() == HloInstruction::FusionKind::kInput); })) { VLOG(3) << "Not merging " << fusion->name() << ": Some of its users are not loop/input fusion kernels."; diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc index deef5966b8..c0def27525 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -16,13 +16,17 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace gpu { namespace { +namespace op = xla::testing::opcode_matchers; + class FusionMergerTest : public HloTestBase { protected: FusionMergerTest() : module_(CreateNewModule()) {} @@ -459,6 +463,43 @@ TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) { EXPECT_TRUE(FusionMerger().Run(module_.get()).ValueOrDie()); } +// Check that we're willing to merge f1_computation into f2_computation, even +// though f2 is an input fusion node. +TEST_F(FusionMergerTest, WillMergeIntoInputFusion) { + const char* const kModule = R"( + HloModule m + + f1_computation { + f1_p0 = f32[10]{0} parameter(0) + ROOT f1_root = f32[10]{0} add(f1_p0, f1_p0) + } + + add_computation { + add_lhs = f32[] parameter(0) + add_rhs = f32[] parameter(1) + ROOT add_root = f32[] add(add_lhs, add_rhs) + } + + f2_computation { + f2_p0 = f32[10]{0} parameter(0) + f2_mul = f32[10]{0} multiply(f2_p0, f2_p0) + f2_zero = f32[] constant(0) + ROOT f2_root = f32[] reduce(f2_mul, f2_zero), dimensions={0}, + to_apply=add_computation + } + + ENTRY entry { + p0 = f32[10]{0} parameter(0) + f1 = f32[10]{0} fusion(p0), kind=kLoop, calls=f1_computation + ROOT f2 = f32[] fusion(f1), kind=kInput, calls=f2_computation + } + )"; + auto module = tools::Parse(kModule).ValueOrDie(); + EXPECT_TRUE(FusionMerger().Run(module.get()).ValueOrDie()); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Fusion(op::Parameter())); +} + } // namespace } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 30c88c0a5d..065b3a0e31 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -535,6 +535,13 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { // If no operand has a compatible shape, prefer an operand that has // the same rank at least. for (const HloInstruction* operand : operands) { + // Skip tuple-shaped operands; calling ShapeUtil::Rank on a + // tuple-shaped Shape is illegal. Perhaps more correct would be to + // recurse into them, but TODO(kramerb): Remove this code after + // assigning layouts to fusion nodes. + if (ShapeUtil::IsTuple(operand->shape())) { + continue; + } if (ShapeUtil::Rank(*input_shape) == ShapeUtil::Rank(operand->shape())) { // Do not use CopyLayoutBetweenShapes because input_shape and -- GitLab From 7558b085afd4ba8ffb5d9ceab0616cc4ba0649b1 Mon Sep 17 00:00:00 2001 From: chengzhi chen Date: Tue, 6 Mar 2018 05:41:51 +0800 Subject: [PATCH 311/311] Lite: Supporting Raspberry Pi. (#16431) Now we can cross compiling or native compiling libtensorflow-lite.a for rpi. * Remove unnecessary space between $(CC_PREFIX) and gcc. * Adding -O3 -DNDEBUG CFLAGS same as CXXFLAGS. * Remove redundant -lpthread link flag. * Add Makefile for RPi. --- tensorflow/contrib/lite/Makefile | 9 +++-- tensorflow/contrib/lite/build_rpi_lib.sh | 22 +++++++++++ tensorflow/contrib/lite/g3doc/rpi.md | 50 ++++++++++++++++++++++++ tensorflow/contrib/lite/rpi_makefile.inc | 33 ++++++++++++++++ 4 files changed, 110 insertions(+), 4 deletions(-) create mode 100755 tensorflow/contrib/lite/build_rpi_lib.sh create mode 100644 tensorflow/contrib/lite/g3doc/rpi.md create mode 100644 tensorflow/contrib/lite/rpi_makefile.inc diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 7f31629272..b4504f246a 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. -CXX := $(CC_PREFIX) gcc +CXX := $(CC_PREFIX)gcc CXXFLAGS := --std=c++11 -O3 -DNDEBUG -CC := $(CC_PREFIX) gcc -CFLAGS := +CC := $(CC_PREFIX)gcc +CFLAGS := -O3 -DNDEBUG LDOPTS := LDOPTS += -L/usr/local/lib ARFLAGS := -r @@ -57,10 +57,11 @@ LIBS := \ # If we're on Linux, also link in the dl library. ifeq ($(HOST_OS),LINUX) - LIBS += -ldl -lpthread + LIBS += -ldl endif include $(MAKEFILE_DIR)/ios_makefile.inc +include $(MAKEFILE_DIR)/rpi_makefile.inc # This library is the main target for this makefile. It will contain a minimal # runtime that can be linked in to other programs. diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh new file mode 100755 index 0000000000..3824b16412 --- /dev/null +++ b/tensorflow/contrib/lite/build_rpi_lib.sh @@ -0,0 +1,22 @@ +#!/bin/bash -x +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR/../../.." + +CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7 diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md new file mode 100644 index 0000000000..7a3a231626 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/rpi.md @@ -0,0 +1,50 @@ +# TensorFlow Lite for Raspberry Pi + +## Cross compiling +### Installing toolchian +This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/). + +To cross compiling TensorFlow Lite. First you should install the toolchain and libs. +```bash +sudo apt-get update +sudo apt-get install crossbuild-essential-armhf +``` +> If you are using docker, you may not use `sudo` + +### Building +Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies: +> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it. +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. + +## Native compiling +This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1). + +Log in to you RPI, install the toolchain. +```bash +sudo apt-get instal build-essential +``` + +First, clone this TensorFlow repository. Run this at the root of the repository: +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc new file mode 100644 index 0000000000..832ef5824b --- /dev/null +++ b/tensorflow/contrib/lite/rpi_makefile.inc @@ -0,0 +1,33 @@ +# Settings for Raspberry Pi. +ifeq ($(TARGET), RPI) + ifeq ($(TARGET_ARCH), armv7) + CXXFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + CCFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + LDFLAGS := \ + -Wl,--no-export-dynamic \ + -Wl,--exclude-libs,ALL \ + -Wl,--gc-sections \ + -Wl,--as-needed + endif + + LIBS := \ + -lstdc++ \ + -lpthread \ + -lm \ + -ldl + + OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/ + LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/ + BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/ + DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/ +endif -- GitLab